2024-08-09 04:04:30 -04:00
// Import necessary modules for the application
import express from 'express' ; // Express framework for building web server applications and handling HTTP requests and responses
import axios from 'axios' ; // Axios is used to make HTTP requests to external APIs or services
import bodyParser from 'body-parser' ; // Middleware for parsing incoming request bodies, specifically for handling JSON data
import cmd from 'cmd-promise' ; // A module that allows execution of shell commands in a promise-based manner, making it easier to manage async operations
import cors from 'cors' ; // Middleware to enable Cross-Origin Resource Sharing, allowing resources to be requested from another domain
import cheerio from 'cheerio' ; // Cheerio is a server-side jQuery-like library for parsing and manipulating HTML content
import 'dotenv/config' ; // Loads environment variables from a .env file into process.env, allowing secure storage of sensitive information
import llamaTokenizer from 'llama-tokenizer-js' ; // A library for tokenizing text, which is crucial for managing the length of text inputs to the AI model
// Define a prompt that will guide the AI's behavior when analyzing NGINX logs for potential security issues
2024-08-09 03:24:50 -04:00
const prompt = `
You are a security AI responsible for analyzing web traffic from NGINX logs and blocking malicious IPs . Your task is to review the logs for potential attacks and issues . If you identify a verified problem , include [ ALERT ] followed by a detailed description of the issue in your response . Ensure your message is formatted in Markdown compatible with Discord .
* * Guidelines : * *
- Domains on the server : shells . lol , hehe . rest , dcord . rest , nodejs . lol , dht . rest , my - mc . link , ssh . surf , tcp . quest
- Use emojis to enhance communication .
- Do not report IP addresses for scraping or crawling .
- Ignore IPs : x . x . x . x , x . x . x . x , x . x . x . x , x . x . x . x . Do not mention these in reports .
- Ignore IP addresses with BOGONs such as 192.168 . 0.1 or 127.0 . 0.2 , etc .
- Avoid alerting for false positives or irregular activity .
- If there are no alerts but you have interesting findings , write : GENERAL followed by your insights in Markdown .
- Only send GENERAL messages for noteworthy events , not for routine traffic reports .
- In a GENERAL message , feel free to provide a long explainer on your deductions .
- Be decisive . If an IP is being malicious , block it . Do not monitor IPs for further traffic .
- Do not ban an IP address without a corresponding log entry , provide this in your response .
- Block all bot IPs and information scanners except Google .
- Provide proof and reasoning for each ban .
- DO NOT BAN AN IP THAT IS NOT IN A LOG EVER ! YOU MUST SEE THE IP ADDRESS !
- To ban an IP or flag it as a security risk , wrap it in a Markdown spoiler : || IPHERE ||
` ;
2024-08-09 04:04:30 -04:00
// Initialize the Express application and define the port on which the server will run
const app = express ( ) ; // Create an instance of an Express application
const port = 3001 ; // Define the port number for the server, 3001 is commonly used for development
2024-08-09 03:24:50 -04:00
2024-08-09 04:04:30 -04:00
// Middleware to enable CORS for all routes
app . use ( cors ( ) ) ; // This allows the server to accept requests from any origin, useful for APIs that may be accessed by web applications from different domains
2024-08-09 03:24:50 -04:00
2024-08-09 04:04:30 -04:00
// Set a larger limit for the request body to handle large data payloads
app . use ( bodyParser . json ( { limit : '50mb' } ) ) ; // The JSON body parser is configured with a 50MB limit, suitable for handling large JSON payloads
2024-08-09 03:24:50 -04:00
2024-08-09 04:04:30 -04:00
// Define constants for the application, used to control various aspects of the server's behavior
const TIMEOUT _DURATION = 100000 ; // The maximum time (in milliseconds) the server will wait before timing out a request, set to 100 seconds
const MAX _TOKENS = 8000 ; // The maximum number of tokens (words and punctuation) allowed in a conversation, this limit helps manage API usage
const TOLERANCE = 100 ; // A buffer value used to prevent exceeding the MAX_TOKENS limit, ensuring the conversation stays within safe bounds
let conversationHistory = { } ; // An object to store conversation history for each IP address, allowing the server to maintain context for each user
2024-08-09 03:24:50 -04:00
2024-08-09 04:04:30 -04:00
// Helper function to get the current timestamp in a formatted string
2024-08-09 03:24:50 -04:00
const getTimestamp = ( ) => {
2024-08-09 04:04:30 -04:00
const now = new Date ( ) ; // Get the current date and time
const date = now . toLocaleDateString ( 'en-US' ) ; // Format the date in the US locale
const time = now . toLocaleTimeString ( 'en-US' ) ; // Format the time in the US locale
return ` ${ date } [ ${ time } ] ` ; // Return the formatted date and time as a string
2024-08-09 03:24:50 -04:00
} ;
2024-08-09 04:04:30 -04:00
// Middleware to track conversation history based on the client's IP address
2024-08-09 03:24:50 -04:00
app . use ( ( req , res , next ) => {
2024-08-09 04:04:30 -04:00
// Extract the client's IP address from various possible headers (CF-Connecting-IP, X-Forwarded-For, X-Real-IP) or fallback to req.ip
2024-08-09 03:24:50 -04:00
const ip = req . headers [ 'cf-connecting-ip' ] || req . headers [ 'x-forwarded-for' ] || req . headers [ 'x-real-ip' ] || req . ip ;
2024-08-09 04:04:30 -04:00
req . clientIp = ip ; // Store the client's IP address in the request object for easy access later
// Log the incoming request along with the client's IP address and current timestamp
console . log ( ` ${ getTimestamp ( ) } [INFO] Incoming request from IP: ${ req . clientIp } ` ) ;
2024-08-09 03:24:50 -04:00
2024-08-09 04:04:30 -04:00
// If this IP address has not been seen before, initialize a new conversation history for it
2024-08-09 03:24:50 -04:00
if ( ! conversationHistory [ req . clientIp ] ) {
console . log ( ` ${ getTimestamp ( ) } [INFO] Initializing conversation history for new IP: ${ req . clientIp } ` ) ;
2024-08-09 04:04:30 -04:00
// Start the conversation with the predefined prompt that instructs the AI on how to analyze the logs
2024-08-09 03:24:50 -04:00
conversationHistory [ req . clientIp ] = [
{ role : 'system' , content : prompt }
] ;
}
2024-08-09 04:04:30 -04:00
next ( ) ; // Move on to the next middleware or route handler
2024-08-09 03:24:50 -04:00
} ) ;
2024-08-09 04:04:30 -04:00
// Function to count the number of tokens in a conversation history using the llama tokenizer
2024-08-09 03:24:50 -04:00
async function countLlamaTokens ( messages ) {
2024-08-09 04:04:30 -04:00
let totalTokens = 0 ; // Initialize a counter for the total number of tokens
2024-08-09 03:24:50 -04:00
for ( const message of messages ) {
2024-08-09 04:04:30 -04:00
// Only count tokens for user and assistant messages, not system messages
2024-08-09 03:24:50 -04:00
if ( message . role === 'user' || message . role === 'assistant' ) {
2024-08-09 04:04:30 -04:00
const encodedTokens = llamaTokenizer . encode ( message . content ) ; // Tokenize the message content
totalTokens += encodedTokens . length ; // Add the number of tokens in the current message to the total
2024-08-09 03:24:50 -04:00
}
}
2024-08-09 04:04:30 -04:00
return totalTokens ; // Return the total number of tokens
2024-08-09 03:24:50 -04:00
}
2024-08-09 04:04:30 -04:00
// Function to trim the conversation history to fit within the token limit
2024-08-09 03:24:50 -04:00
async function trimConversationHistory ( messages , maxLength , tolerance ) {
2024-08-09 04:04:30 -04:00
let tokenLength = await countLlamaTokens ( messages ) ; // Get the total number of tokens in the conversation
// Continue trimming the conversation history until it's within the allowed token limit
2024-08-09 03:24:50 -04:00
while ( tokenLength > maxLength - tolerance && messages . length > 1 ) {
2024-08-09 04:04:30 -04:00
messages . splice ( 1 , 1 ) ; // Remove the oldest user/assistant message (the second item in the array)
tokenLength = await countLlamaTokens ( messages ) ; // Recalculate the total number of tokens after trimming
2024-08-09 03:24:50 -04:00
console . log ( ` ${ getTimestamp ( ) } [CLEANUP] Trimmed conversation history to ${ tokenLength } tokens. ` ) ;
}
}
2024-08-09 04:04:30 -04:00
// Function to scrape a web page and extract its content
2024-08-09 03:24:50 -04:00
async function scrapeWebPage ( url ) {
console . log ( ` ${ getTimestamp ( ) } [INFO] Starting to scrape URL: ${ url } ` ) ;
try {
2024-08-09 04:04:30 -04:00
// Perform an HTTP GET request to fetch the content of the specified URL
2024-08-09 03:24:50 -04:00
const res = await axios . get ( url ) ;
2024-08-09 04:04:30 -04:00
const html = res . data ; // Extract the HTML content from the response
const $ = cheerio . load ( html ) ; // Load the HTML into Cheerio for parsing and manipulation
2024-08-09 03:24:50 -04:00
2024-08-09 04:04:30 -04:00
// Extract specific elements from the HTML: the page title, meta description, and body content
const pageTitle = $ ( 'head title' ) . text ( ) . trim ( ) ; // Get the text of the <title> tag
const pageDescription = $ ( 'head meta[name="description"]' ) . attr ( 'content' ) ; // Get the content of the meta description
const pageContent = $ ( 'body' ) . text ( ) . trim ( ) ; // Get all text content within the <body> tag
2024-08-09 03:24:50 -04:00
2024-08-09 04:04:30 -04:00
// Construct a response message with the extracted details
let response = ` Title: ${ pageTitle } \n ` ; // Start with the page title
2024-08-09 03:24:50 -04:00
if ( pageDescription ) {
2024-08-09 04:04:30 -04:00
response += ` Description: ${ pageDescription } \n ` ; // Add the meta description if it exists
2024-08-09 03:24:50 -04:00
}
if ( pageContent ) {
2024-08-09 04:04:30 -04:00
const MAX _CONTENT _LENGTH = process . env . MAX _CONTENT _LENGTH || 2000 ; // Set a maximum length for the content
// Clean the page content to remove unnecessary whitespace and special characters
2024-08-09 03:24:50 -04:00
let plainTextContent = $ ( '<div>' ) . html ( pageContent ) . text ( ) . trim ( ) . replace ( /[\r\n\t]+/g , ' ' ) ;
2024-08-09 04:04:30 -04:00
// Define a regular expression pattern to identify code-like content
2024-08-09 03:24:50 -04:00
const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/ ;
2024-08-09 04:04:30 -04:00
const isCode = codePattern . test ( plainTextContent ) ; // Check if the content resembles code
2024-08-09 03:24:50 -04:00
if ( isCode ) {
2024-08-09 04:04:30 -04:00
plainTextContent = plainTextContent . replace ( codePattern , '' ) ; // Remove code-like patterns if detected
2024-08-09 03:24:50 -04:00
}
2024-08-09 04:04:30 -04:00
// Further clean the content by removing text within parentheses
2024-08-09 03:24:50 -04:00
plainTextContent = plainTextContent . replace ( / *\([^)]*\) */g , '' ) ;
2024-08-09 04:04:30 -04:00
// If the content is too long, truncate it and add an ellipsis
2024-08-09 03:24:50 -04:00
if ( plainTextContent . length > MAX _CONTENT _LENGTH ) {
plainTextContent = plainTextContent . substring ( 0 , MAX _CONTENT _LENGTH ) + '...' ;
}
2024-08-09 04:04:30 -04:00
response += ` Content: ${ plainTextContent . trim ( ) } ` ; // Add the cleaned and possibly truncated content to the response
2024-08-09 03:24:50 -04:00
}
2024-08-09 04:04:30 -04:00
response += ` \n URL: ${ url } ` ; // Include the original URL in the response
2024-08-09 03:24:50 -04:00
console . log ( ` ${ getTimestamp ( ) } [INFO] Successfully scraped URL: ${ url } ` ) ;
2024-08-09 04:04:30 -04:00
return response ; // Return the constructed response
2024-08-09 03:24:50 -04:00
} catch ( err ) {
2024-08-09 04:04:30 -04:00
// If the scraping process fails, log an error with details and return null
2024-08-09 03:24:50 -04:00
console . error ( ` ${ getTimestamp ( ) } [ERROR] Failed to scrape URL: ${ url } ` , err ) ;
return null ;
}
}
2024-08-09 04:04:30 -04:00
// Function to process incoming requests, handle AI interactions, and return a response
2024-08-09 03:24:50 -04:00
async function processRequest ( req , res ) {
2024-08-09 04:04:30 -04:00
const startTime = Date . now ( ) ; // Record the start time of the request processing for performance tracking
const ip = req . clientIp ; // Retrieve the client's IP address from the request object
console . log ( ` ${ getTimestamp ( ) } [INFO] Handling chat request from IP: ${ ip } ` ) ; // Log the request details
2024-08-09 03:24:50 -04:00
2024-08-09 04:04:30 -04:00
// Set a timeout for the request processing, ensuring it doesn't hang indefinitely
2024-08-09 03:24:50 -04:00
const timeout = setTimeout ( ( ) => {
console . error ( ` ${ getTimestamp ( ) } [ERROR] Request timed out for IP: ${ ip } ` ) ;
2024-08-09 04:04:30 -04:00
res . status ( 408 ) . json ( { message : "Request timed out" } ) ; // Send a timeout response if the processing takes too long
2024-08-09 03:24:50 -04:00
} , TIMEOUT _DURATION ) ;
try {
2024-08-09 04:04:30 -04:00
let userMessage = req . body . message ; // Extract the user's message from the request body
2024-08-09 03:24:50 -04:00
console . log ( ` ${ getTimestamp ( ) } [INFO] Received user message: ${ userMessage } ` ) ;
2024-08-09 04:04:30 -04:00
userMessage = req . body . message + ` \n Date/Time: ${ getTimestamp ( ) } ` ; // Append the current date and time to the user's message
2024-08-09 03:24:50 -04:00
2024-08-09 04:04:30 -04:00
// Initialize conversation history if it doesn't exist for the IP
2024-08-09 03:24:50 -04:00
if ( ! conversationHistory [ ip ] ) {
console . log ( ` ${ getTimestamp ( ) } [INFO] Initializing conversation history for new IP: ${ ip } ` ) ;
2024-08-09 04:04:30 -04:00
conversationHistory [ ip ] = [ { role : 'system' , content : prompt } ] ; // Start the conversation with the predefined prompt
2024-08-09 03:24:50 -04:00
}
2024-08-09 04:04:30 -04:00
// Add the user's message to the conversation history for the IP
2024-08-09 03:24:50 -04:00
conversationHistory [ ip ] . push ( { role : 'user' , content : userMessage } ) ;
2024-08-09 04:04:30 -04:00
// Trim the conversation history if it exceeds the token limit
2024-08-09 03:24:50 -04:00
await trimConversationHistory ( conversationHistory [ ip ] , MAX _TOKENS , TOLERANCE ) ;
2024-08-09 04:04:30 -04:00
// Split the user's message into individual log lines
2024-08-09 03:24:50 -04:00
const logLines = userMessage . split ( '\n' ) ;
2024-08-09 04:04:30 -04:00
// Define a regex pattern to identify lines containing client IP addresses
2024-08-09 03:24:50 -04:00
const clientIpRegex = /\[Client (\d{1,3}\.){3}\d{1,3}\]/ ;
2024-08-09 04:04:30 -04:00
// Filter the log lines to only include those with valid client IP addresses
2024-08-09 03:24:50 -04:00
const filteredLogLines = logLines . filter ( line => clientIpRegex . test ( line ) ) ;
2024-08-09 04:04:30 -04:00
// If no valid IP addresses are found in the log lines, send a response indicating this
2024-08-09 03:24:50 -04:00
if ( filteredLogLines . length === 0 ) {
console . log ( ` ${ getTimestamp ( ) } [INFO] No valid client IP addresses found in the log. ` ) ;
res . json ( { message : "No valid client IP addresses found in the log." } ) ;
return ;
}
2024-08-09 04:04:30 -04:00
// Join the filtered log lines back into a single string for processing
2024-08-09 03:24:50 -04:00
const filteredMessage = filteredLogLines . join ( '\n' ) ;
2024-08-09 04:04:30 -04:00
// Send the request to the llama API for processing and response generation
2024-08-09 03:24:50 -04:00
console . log ( ` ${ getTimestamp ( ) } [INFO] Sending request to llama API for response ` ) ;
const response = await axios . post ( 'http://127.0.0.1:8003/v1/chat/completions' , {
2024-08-09 04:04:30 -04:00
model : 'gpt-3.5-turbo' , // Specify the AI model to use
messages : [ ... conversationHistory [ ip ] , { role : 'user' , content : filteredMessage } ] // Include the conversation history and the filtered message
2024-08-09 03:24:50 -04:00
} ) ;
2024-08-09 04:04:30 -04:00
// Extract the AI's response from the API's response data
2024-08-09 03:24:50 -04:00
const assistantMessage = response . data . choices [ 0 ] . message ;
2024-08-09 04:04:30 -04:00
// Add the AI's response to the conversation history
2024-08-09 03:24:50 -04:00
conversationHistory [ ip ] . push ( assistantMessage ) ;
2024-08-09 04:04:30 -04:00
// Log the AI's response and additional details like the finish reason and token usage
2024-08-09 03:24:50 -04:00
console . log ( ` ${ getTimestamp ( ) } [INFO] Received response from llama API: ${ assistantMessage . content } ` ) ;
console . log ( ` ${ getTimestamp ( ) } [DEBUG] Finish Reason: ${ response . data . choices [ 0 ] . finish _reason } ` ) ;
console . log ( ` ${ getTimestamp ( ) } [STATS] Usage: prompt_tokens= ${ response . data . usage . prompt _tokens } , completion_tokens= ${ response . data . usage . completion _tokens } , total_tokens= ${ response . data . usage . total _tokens } ` ) ;
2024-08-09 04:04:30 -04:00
clearTimeout ( timeout ) ; // Clear the timeout to prevent it from triggering
res . json ( assistantMessage ) ; // Send the AI's response back to the client
2024-08-09 03:24:50 -04:00
} catch ( error ) {
2024-08-09 04:04:30 -04:00
// If an error occurs during request processing, log the error and send a 500 response
2024-08-09 03:24:50 -04:00
console . error ( ` ${ getTimestamp ( ) } [ERROR] An error occurred while handling chat request ` , error ) ;
2024-08-09 04:04:30 -04:00
clearTimeout ( timeout ) ; // Clear the timeout to prevent it from triggering
res . status ( 500 ) . json ( { message : "An error occurred" , error : error . message } ) ; // Send an error response
2024-08-09 03:24:50 -04:00
} finally {
2024-08-09 04:04:30 -04:00
// Record the end time and calculate the total processing time in seconds
const endTime = Date . now ( ) ;
const processingTime = ( ( endTime - startTime ) / 1000 ) . toFixed ( 2 ) ; // Convert milliseconds to seconds
console . log ( ` ${ getTimestamp ( ) } [STATS] Processing Time: ${ processingTime } seconds ` ) ; // Log the processing time
2024-08-09 03:24:50 -04:00
console . log ( ` ${ getTimestamp ( ) } [INFO] Finished processing chat request for IP: ${ ip } ` ) ;
}
}
2024-08-09 04:04:30 -04:00
// Route to handle incoming chat requests, trim the message content, and process the request
2024-08-09 03:24:50 -04:00
app . post ( '/api/v1/chat' , async ( req , res ) => {
// Trim the incoming message to fit within token limits
2024-08-09 04:04:30 -04:00
const messageContent = req . body . message ; // Get the user's message from the request body
const encodedTokens = llamaTokenizer . encode ( messageContent ) ; // Tokenize the message to determine its length in tokens
const MAX _MESSAGE _TOKENS = MAX _TOKENS - ( await countLlamaTokens ( [ { role : 'system' , content : prompt } ] ) ) - TOLERANCE ; // Calculate the maximum allowed tokens for the user's message
2024-08-09 03:24:50 -04:00
2024-08-09 04:04:30 -04:00
// If the message exceeds the allowed token limit, trim it to fit
2024-08-09 03:24:50 -04:00
let trimmedMessageContent = messageContent ;
if ( encodedTokens . length > MAX _MESSAGE _TOKENS ) {
2024-08-09 04:04:30 -04:00
trimmedMessageContent = llamaTokenizer . decode ( encodedTokens . slice ( 0 , MAX _MESSAGE _TOKENS ) ) ; // Truncate the message and decode it back to a string
2024-08-09 03:24:50 -04:00
}
2024-08-09 04:04:30 -04:00
// Process the trimmed message and send the response
2024-08-09 03:24:50 -04:00
await processRequest ( { ... req , body : { message : trimmedMessageContent } } , res ) ;
} ) ;
2024-08-09 04:04:30 -04:00
// Route to fetch the conversation history for a specific IP address
2024-08-09 03:24:50 -04:00
app . get ( '/api/v1/conversation-history' , ( req , res ) => {
2024-08-09 04:04:30 -04:00
const ip = req . clientIp ; // Get the client's IP address from the request object
console . log ( ` ${ getTimestamp ( ) } [INFO] Fetching conversation history for IP: ${ ip } ` ) ; // Log the request details
res . json ( conversationHistory [ ip ] ) ; // Send the conversation history for the IP as a JSON response
2024-08-09 03:24:50 -04:00
} ) ;
2024-08-09 04:04:30 -04:00
// Route to restart the core AI service via Docker, typically used to refresh the model or resolve issues
2024-08-09 03:24:50 -04:00
app . post ( '/api/v1/restart-core' , ( req , res ) => {
2024-08-09 04:04:30 -04:00
console . log ( ` ${ getTimestamp ( ) } [INFO] Restarting core service ` ) ; // Log the restart action
cmd ( ` docker restart llama-gpu-server ` ) . then ( out => { // Execute a shell command to restart the Docker container running the AI model
console . log ( ` ${ getTimestamp ( ) } [INFO] Core service restarted ` ) ; // Log the successful restart
res . json ( out . stdout ) ; // Send the output of the restart command back to the client
} ) . catch ( err => { // Handle any errors that occur during the restart
console . error ( ` ${ getTimestamp ( ) } [ERROR] Failed to restart core service ` , err ) ; // Log the error
res . status ( 500 ) . json ( { message : "An error occurred while restarting the core service" , error : err . message } ) ; // Send an error response
2024-08-09 03:24:50 -04:00
} ) ;
} ) ;
2024-08-09 04:04:30 -04:00
// Route to reset the conversation history for a specific IP address, effectively starting a new session
2024-08-09 03:24:50 -04:00
app . post ( '/api/v1/reset-conversation' , ( req , res ) => {
2024-08-09 04:04:30 -04:00
const ip = req . clientIp ; // Get the client's IP address from the request object
console . log ( ` ${ getTimestamp ( ) } [INFO] Resetting conversation history for IP: ${ ip } ` ) ; // Log the reset action
2024-08-09 03:24:50 -04:00
2024-08-09 04:04:30 -04:00
// Reset the conversation history to its initial state for the given IP
2024-08-09 03:24:50 -04:00
conversationHistory [ ip ] = [
{ role : 'system' , content : prompt }
] ;
2024-08-09 04:04:30 -04:00
console . log ( ` ${ getTimestamp ( ) } [INFO] Conversation history reset for IP: ${ ip } ` ) ; // Log the successful reset
res . json ( { message : "Conversation history reset for IP: " + ip } ) ; // Send a confirmation message back to the client
2024-08-09 03:24:50 -04:00
} ) ;
2024-08-09 04:04:30 -04:00
// Start the Express server on the defined port, making the API available for requests
2024-08-09 03:24:50 -04:00
app . listen ( port , ( ) => {
2024-08-09 04:04:30 -04:00
console . log ( ` ${ getTimestamp ( ) } [INFO] Server running at http://localhost: ${ port } ` ) ; // Log the server startup and its URL
2024-08-09 03:24:50 -04:00
} ) ;