Add detailed comments

2024-08-09 04:04:30 -04:00
parent 187abd1868
commit 7e75ea80a6
1 changed files with 128 additions and 85 deletions
--- a/ai_log.js
+++ b/ai_log.js
@@ -1,12 +1,14 @@
-import express from 'express';
-import axios from 'axios';
-import bodyParser from 'body-parser';
-import cmd from 'cmd-promise';
-import cors from 'cors';
-import cheerio from 'cheerio';
-import 'dotenv/config';
-import llamaTokenizer from 'llama-tokenizer-js';
+// Import necessary modules for the application
+import express from 'express'; // Express framework for building web server applications and handling HTTP requests and responses
+import axios from 'axios'; // Axios is used to make HTTP requests to external APIs or services
+import bodyParser from 'body-parser'; // Middleware for parsing incoming request bodies, specifically for handling JSON data
+import cmd from 'cmd-promise'; // A module that allows execution of shell commands in a promise-based manner, making it easier to manage async operations
+import cors from 'cors'; // Middleware to enable Cross-Origin Resource Sharing, allowing resources to be requested from another domain
+import cheerio from 'cheerio'; // Cheerio is a server-side jQuery-like library for parsing and manipulating HTML content
+import 'dotenv/config'; // Loads environment variables from a .env file into process.env, allowing secure storage of sensitive information
+import llamaTokenizer from 'llama-tokenizer-js'; // A library for tokenizing text, which is crucial for managing the length of text inputs to the AI model

+// Define a prompt that will guide the AI's behavior when analyzing NGINX logs for potential security issues
 const prompt = `
 You are a security AI responsible for analyzing web traffic from NGINX logs and blocking malicious IPs. Your task is to review the logs for potential attacks and issues. If you identify a verified problem, include [ALERT] followed by a detailed description of the issue in your response. Ensure your message is formatted in Markdown compatible with Discord.

@@ -28,209 +30,250 @@ You are a security AI responsible for analyzing web traffic from NGINX logs and
 - To ban an IP or flag it as a security risk, wrap it in a Markdown spoiler: ||IPHERE||
 `;

-const app = express();
-const port = 3001;
+// Initialize the Express application and define the port on which the server will run
+const app = express(); // Create an instance of an Express application
+const port = 3001; // Define the port number for the server, 3001 is commonly used for development

-app.use(cors()); // Enable CORS for all routes
+// Middleware to enable CORS for all routes
+app.use(cors()); // This allows the server to accept requests from any origin, useful for APIs that may be accessed by web applications from different domains

-// Set a larger limit for the request body
-app.use(bodyParser.json({ limit: '50mb' })); // Adjust the limit as needed
+// Set a larger limit for the request body to handle large data payloads
+app.use(bodyParser.json({ limit: '50mb' })); // The JSON body parser is configured with a 50MB limit, suitable for handling large JSON payloads

-const TIMEOUT_DURATION = 100000; // Timeout duration in milliseconds (100 seconds)
-const MAX_TOKENS = 8000; // Maximum tokens allowed by the model
-const TOLERANCE = 100; // Tolerance to avoid exceeding token limit
-let conversationHistory = {};
+// Define constants for the application, used to control various aspects of the server's behavior
+const TIMEOUT_DURATION = 100000; // The maximum time (in milliseconds) the server will wait before timing out a request, set to 100 seconds
+const MAX_TOKENS = 8000; // The maximum number of tokens (words and punctuation) allowed in a conversation, this limit helps manage API usage
+const TOLERANCE = 100; // A buffer value used to prevent exceeding the MAX_TOKENS limit, ensuring the conversation stays within safe bounds
+let conversationHistory = {}; // An object to store conversation history for each IP address, allowing the server to maintain context for each user

-// Helper function to get current timestamp
+// Helper function to get the current timestamp in a formatted string
 const getTimestamp = () => {
-    const now = new Date();
-    const date = now.toLocaleDateString('en-US');
-    const time = now.toLocaleTimeString('en-US');
-    return `${date} [${time}]`;
+    const now = new Date(); // Get the current date and time
+    const date = now.toLocaleDateString('en-US'); // Format the date in the US locale
+    const time = now.toLocaleTimeString('en-US'); // Format the time in the US locale
+    return `${date} [${time}]`; // Return the formatted date and time as a string
 };

-// Middleware to track conversation history by CF-Connecting-IP
+// Middleware to track conversation history based on the client's IP address
 app.use((req, res, next) => {
+    // Extract the client's IP address from various possible headers (CF-Connecting-IP, X-Forwarded-For, X-Real-IP) or fallback to req.ip
    const ip = req.headers['cf-connecting-ip'] || req.headers['x-forwarded-for'] || req.headers['x-real-ip'] || req.ip;
-    req.clientIp = ip; // Store the IP in a request property
-    console.log(`${getTimestamp()} [INFO] Incoming request from IP: ${req.clientIp}`); // Log the IP address
+    req.clientIp = ip; // Store the client's IP address in the request object for easy access later

+    // Log the incoming request along with the client's IP address and current timestamp
+    console.log(`${getTimestamp()} [INFO] Incoming request from IP: ${req.clientIp}`);
+
+    // If this IP address has not been seen before, initialize a new conversation history for it
    if (!conversationHistory[req.clientIp]) {
        console.log(`${getTimestamp()} [INFO] Initializing conversation history for new IP: ${req.clientIp}`);
+        // Start the conversation with the predefined prompt that instructs the AI on how to analyze the logs
        conversationHistory[req.clientIp] = [
            { role: 'system', content: prompt }
        ];
    }
-    next();
+    next(); // Move on to the next middleware or route handler
 });

+// Function to count the number of tokens in a conversation history using the llama tokenizer
 async function countLlamaTokens(messages) {
-    let totalTokens = 0;
+    let totalTokens = 0; // Initialize a counter for the total number of tokens
    for (const message of messages) {
+        // Only count tokens for user and assistant messages, not system messages
        if (message.role === 'user' || message.role === 'assistant') {
-            const encodedTokens = llamaTokenizer.encode(message.content);
-            totalTokens += encodedTokens.length;
+            const encodedTokens = llamaTokenizer.encode(message.content); // Tokenize the message content
+            totalTokens += encodedTokens.length; // Add the number of tokens in the current message to the total
        }
    }
-    return totalTokens;
+    return totalTokens; // Return the total number of tokens
 }

+// Function to trim the conversation history to fit within the token limit
 async function trimConversationHistory(messages, maxLength, tolerance) {
-    let tokenLength = await countLlamaTokens(messages);
+    let tokenLength = await countLlamaTokens(messages); // Get the total number of tokens in the conversation
+    // Continue trimming the conversation history until it's within the allowed token limit
    while (tokenLength > maxLength - tolerance && messages.length > 1) {
-        messages.splice(1, 1); // Remove the oldest user/assistant message
-        tokenLength = await countLlamaTokens(messages);
+        messages.splice(1, 1); // Remove the oldest user/assistant message (the second item in the array)
+        tokenLength = await countLlamaTokens(messages); // Recalculate the total number of tokens after trimming
        console.log(`${getTimestamp()} [CLEANUP] Trimmed conversation history to ${tokenLength} tokens.`);
    }
 }

-// Function to scrape web page
+// Function to scrape a web page and extract its content
 async function scrapeWebPage(url) {
    console.log(`${getTimestamp()} [INFO] Starting to scrape URL: ${url}`);
    try {
+        // Perform an HTTP GET request to fetch the content of the specified URL
        const res = await axios.get(url);
-        const html = res.data;
-        const $ = cheerio.load(html);
+        const html = res.data; // Extract the HTML content from the response
+        const $ = cheerio.load(html); // Load the HTML into Cheerio for parsing and manipulation

-        // Extract page title, meta description and content
-        const pageTitle = $('head title').text().trim();
-        const pageDescription = $('head meta[name="description"]').attr('content');
-        const pageContent = $('body').text().trim();
+        // Extract specific elements from the HTML: the page title, meta description, and body content
+        const pageTitle = $('head title').text().trim(); // Get the text of the <title> tag
+        const pageDescription = $('head meta[name="description"]').attr('content'); // Get the content of the meta description
+        const pageContent = $('body').text().trim(); // Get all text content within the <body> tag

-        // Construct response message with page details
-        let response = `Title: ${pageTitle}\n`;
+        // Construct a response message with the extracted details
+        let response = `Title: ${pageTitle}\n`; // Start with the page title
        if (pageDescription) {
-            response += `Description: ${pageDescription}\n`;
+            response += `Description: ${pageDescription}\n`; // Add the meta description if it exists
        }
        if (pageContent) {
-            const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH || 2000;
+            const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH || 2000; // Set a maximum length for the content
+            // Clean the page content to remove unnecessary whitespace and special characters
            let plainTextContent = $('<div>').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' ');
+            // Define a regular expression pattern to identify code-like content
            const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/;
-            const isCode = codePattern.test(plainTextContent);
+            const isCode = codePattern.test(plainTextContent); // Check if the content resembles code

            if (isCode) {
-                plainTextContent = plainTextContent.replace(codePattern, '');
+                plainTextContent = plainTextContent.replace(codePattern, ''); // Remove code-like patterns if detected
            }
+            // Further clean the content by removing text within parentheses
            plainTextContent = plainTextContent.replace(/ *\([^)]*\) */g, '');
+            // If the content is too long, truncate it and add an ellipsis
            if (plainTextContent.length > MAX_CONTENT_LENGTH) {
                plainTextContent = plainTextContent.substring(0, MAX_CONTENT_LENGTH) + '...';
            }
-            response += `Content: ${plainTextContent.trim()}`;
+            response += `Content: ${plainTextContent.trim()}`; // Add the cleaned and possibly truncated content to the response
        }
-        response += `\nURL: ${url}`;
+        response += `\nURL: ${url}`; // Include the original URL in the response

        console.log(`${getTimestamp()} [INFO] Successfully scraped URL: ${url}`);
-        return response;
+        return response; // Return the constructed response
    } catch (err) {
+        // If the scraping process fails, log an error with details and return null
        console.error(`${getTimestamp()} [ERROR] Failed to scrape URL: ${url}`, err);
        return null;
    }
 }

+// Function to process incoming requests, handle AI interactions, and return a response
 async function processRequest(req, res) {
-    const startTime = Date.now(); // Start time tracking
-    const ip = req.clientIp;
-    console.log(`${getTimestamp()} [INFO] Handling chat request from IP: ${ip}`); // Log the IP address
+    const startTime = Date.now(); // Record the start time of the request processing for performance tracking
+    const ip = req.clientIp; // Retrieve the client's IP address from the request object
+    console.log(`${getTimestamp()} [INFO] Handling chat request from IP: ${ip}`); // Log the request details

+    // Set a timeout for the request processing, ensuring it doesn't hang indefinitely
    const timeout = setTimeout(() => {
        console.error(`${getTimestamp()} [ERROR] Request timed out for IP: ${ip}`);
-        res.status(408).json({ message: "Request timed out" });
+        res.status(408).json({ message: "Request timed out" }); // Send a timeout response if the processing takes too long
    }, TIMEOUT_DURATION);

    try {
-        let userMessage = req.body.message;
+        let userMessage = req.body.message; // Extract the user's message from the request body
        console.log(`${getTimestamp()} [INFO] Received user message: ${userMessage}`);
-        userMessage = req.body.message + `\nDate/Time:${getTimestamp()}`;
+        userMessage = req.body.message + `\nDate/Time:${getTimestamp()}`; // Append the current date and time to the user's message

+        // Initialize conversation history if it doesn't exist for the IP
        if (!conversationHistory[ip]) {
            console.log(`${getTimestamp()} [INFO] Initializing conversation history for new IP: ${ip}`);
-            conversationHistory[ip] = [{ role: 'system', content: prompt }];
+            conversationHistory[ip] = [{ role: 'system', content: prompt }]; // Start the conversation with the predefined prompt
        }
+        // Add the user's message to the conversation history for the IP
        conversationHistory[ip].push({ role: 'user', content: userMessage });

-        // Trim conversation history if it exceeds the token limit
+        // Trim the conversation history if it exceeds the token limit
        await trimConversationHistory(conversationHistory[ip], MAX_TOKENS, TOLERANCE);

+        // Split the user's message into individual log lines
        const logLines = userMessage.split('\n');
+        // Define a regex pattern to identify lines containing client IP addresses
        const clientIpRegex = /\[Client (\d{1,3}\.){3}\d{1,3}\]/;
+        // Filter the log lines to only include those with valid client IP addresses
        const filteredLogLines = logLines.filter(line => clientIpRegex.test(line));

+        // If no valid IP addresses are found in the log lines, send a response indicating this
        if (filteredLogLines.length === 0) {
            console.log(`${getTimestamp()} [INFO] No valid client IP addresses found in the log.`);
            res.json({ message: "No valid client IP addresses found in the log." });
            return;
        }

+        // Join the filtered log lines back into a single string for processing
        const filteredMessage = filteredLogLines.join('\n');

+        // Send the request to the llama API for processing and response generation
        console.log(`${getTimestamp()} [INFO] Sending request to llama API for response`);
        const response = await axios.post('http://127.0.0.1:8003/v1/chat/completions', {
-            model: 'gpt-3.5-turbo',
-            messages: [...conversationHistory[ip], { role: 'user', content: filteredMessage }]
+            model: 'gpt-3.5-turbo', // Specify the AI model to use
+            messages: [...conversationHistory[ip], { role: 'user', content: filteredMessage }] // Include the conversation history and the filtered message
        });
+        // Extract the AI's response from the API's response data
        const assistantMessage = response.data.choices[0].message;
+        // Add the AI's response to the conversation history
        conversationHistory[ip].push(assistantMessage);

+        // Log the AI's response and additional details like the finish reason and token usage
        console.log(`${getTimestamp()} [INFO] Received response from llama API: ${assistantMessage.content}`);
        console.log(`${getTimestamp()} [DEBUG] Finish Reason: ${response.data.choices[0].finish_reason}`);
        console.log(`${getTimestamp()} [STATS] Usage: prompt_tokens=${response.data.usage.prompt_tokens}, completion_tokens=${response.data.usage.completion_tokens}, total_tokens=${response.data.usage.total_tokens}`);

-        clearTimeout(timeout);
-        res.json(assistantMessage);
+        clearTimeout(timeout); // Clear the timeout to prevent it from triggering
+        res.json(assistantMessage); // Send the AI's response back to the client
    } catch (error) {
+        // If an error occurs during request processing, log the error and send a 500 response
        console.error(`${getTimestamp()} [ERROR] An error occurred while handling chat request`, error);
-        clearTimeout(timeout);
-        res.status(500).json({ message: "An error occurred", error: error.message });
+        clearTimeout(timeout); // Clear the timeout to prevent it from triggering
+        res.status(500).json({ message: "An error occurred", error: error.message }); // Send an error response
    } finally {
-        const endTime = Date.now(); // End time tracking
-        const processingTime = ((endTime - startTime) / 1000).toFixed(2); // Calculate processing time in seconds
-        console.log(`${getTimestamp()} [STATS] Processing Time: ${processingTime} seconds`);
+        // Record the end time and calculate the total processing time in seconds
+        const endTime = Date.now();
+        const processingTime = ((endTime - startTime) / 1000).toFixed(2); // Convert milliseconds to seconds
+        console.log(`${getTimestamp()} [STATS] Processing Time: ${processingTime} seconds`); // Log the processing time
        console.log(`${getTimestamp()} [INFO] Finished processing chat request for IP: ${ip}`);
    }
 }

+// Route to handle incoming chat requests, trim the message content, and process the request
 app.post('/api/v1/chat', async (req, res) => {
    // Trim the incoming message to fit within token limits
-    const messageContent = req.body.message;
-    const encodedTokens = llamaTokenizer.encode(messageContent);
-    const MAX_MESSAGE_TOKENS = MAX_TOKENS - (await countLlamaTokens([{ role: 'system', content: prompt }])) - TOLERANCE;
+    const messageContent = req.body.message; // Get the user's message from the request body
+    const encodedTokens = llamaTokenizer.encode(messageContent); // Tokenize the message to determine its length in tokens
+    const MAX_MESSAGE_TOKENS = MAX_TOKENS - (await countLlamaTokens([{ role: 'system', content: prompt }])) - TOLERANCE; // Calculate the maximum allowed tokens for the user's message

+    // If the message exceeds the allowed token limit, trim it to fit
    let trimmedMessageContent = messageContent;
    if (encodedTokens.length > MAX_MESSAGE_TOKENS) {
-        trimmedMessageContent = llamaTokenizer.decode(encodedTokens.slice(0, MAX_MESSAGE_TOKENS));
+        trimmedMessageContent = llamaTokenizer.decode(encodedTokens.slice(0, MAX_MESSAGE_TOKENS)); // Truncate the message and decode it back to a string
    }

+    // Process the trimmed message and send the response
    await processRequest({ ...req, body: { message: trimmedMessageContent } }, res);
 });

+// Route to fetch the conversation history for a specific IP address
 app.get('/api/v1/conversation-history', (req, res) => {
-    const ip = req.clientIp;
-    console.log(`${getTimestamp()} [INFO] Fetching conversation history for IP: ${ip}`); // Log the IP address
-    res.json(conversationHistory[ip]);
+    const ip = req.clientIp; // Get the client's IP address from the request object
+    console.log(`${getTimestamp()} [INFO] Fetching conversation history for IP: ${ip}`); // Log the request details
+    res.json(conversationHistory[ip]); // Send the conversation history for the IP as a JSON response
 });

+// Route to restart the core AI service via Docker, typically used to refresh the model or resolve issues
 app.post('/api/v1/restart-core', (req, res) => {
-    console.log(`${getTimestamp()} [INFO] Restarting core service`);
-    cmd(`docker restart llama-gpu-server`).then(out => {
-        console.log(`${getTimestamp()} [INFO] Core service restarted`);
-        res.json(out.stdout);
-    }).catch(err => {
-        console.error(`${getTimestamp()} [ERROR] Failed to restart core service`, err);
-        res.status(500).json({ message: "An error occurred while restarting the core service", error: err.message });
+    console.log(`${getTimestamp()} [INFO] Restarting core service`); // Log the restart action
+    cmd(`docker restart llama-gpu-server`).then(out => { // Execute a shell command to restart the Docker container running the AI model
+        console.log(`${getTimestamp()} [INFO] Core service restarted`); // Log the successful restart
+        res.json(out.stdout); // Send the output of the restart command back to the client
+    }).catch(err => { // Handle any errors that occur during the restart
+        console.error(`${getTimestamp()} [ERROR] Failed to restart core service`, err); // Log the error
+        res.status(500).json({ message: "An error occurred while restarting the core service", error: err.message }); // Send an error response
    });
 });

+// Route to reset the conversation history for a specific IP address, effectively starting a new session
 app.post('/api/v1/reset-conversation', (req, res) => {
-    const ip = req.clientIp;
-    console.log(`${getTimestamp()} [INFO] Resetting conversation history for IP: ${ip}`); // Log the IP address
+    const ip = req.clientIp; // Get the client's IP address from the request object
+    console.log(`${getTimestamp()} [INFO] Resetting conversation history for IP: ${ip}`); // Log the reset action

+    // Reset the conversation history to its initial state for the given IP
    conversationHistory[ip] = [
        { role: 'system', content: prompt }
    ];
-    console.log(`${getTimestamp()} [INFO] Conversation history reset for IP: ${ip}`);
-    res.json({ message: "Conversation history reset for IP: " + ip });
+    console.log(`${getTimestamp()} [INFO] Conversation history reset for IP: ${ip}`); // Log the successful reset
+    res.json({ message: "Conversation history reset for IP: " + ip }); // Send a confirmation message back to the client
 });

+// Start the Express server on the defined port, making the API available for requests
 app.listen(port, () => {
-    console.log(`${getTimestamp()} [INFO] Server running at http://localhost:${port}`);
+    console.log(`${getTimestamp()} [INFO] Server running at http://localhost:${port}`); // Log the server startup and its URL
 });