first commit

2024-08-09 03:24:50 -04:00
commit a26b2c030d
4 changed files with 343 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,2 @@
 package-lock.json
 node_modules
--- a/README.md
+++ b/README.md
@@ -0,0 +1,90 @@
 # AI NGINX Log Analysis 
 ## Overview
 This project is an Express.js-based server designed to interact with a GPT-3.5 model to analyze NGINX logs for potential security threats. The server processes incoming requests, analyzes the content, and provides responses that include alerts or general insights. The server can also scrape web pages and manage conversation histories per IP address.
 ## Features
 - **NGINX Log Analysis**: Analyzes web traffic logs to identify potential security threats and generate appropriate responses.
 - **Conversation History**: Maintains a history of interactions for each client IP, allowing for context-aware responses.
 - **Web Scraping**: Scrapes web pages to extract and format relevant information.
 - **Token Management**: Limits the number of tokens used in the conversation to ensure responses are within the model's limits.
 - **Core Service Management**: Provides endpoints to restart the core GPT service and reset conversation histories.
 - **Cross-Origin Resource Sharing (CORS)**: Enabled for all routes, allowing for flexible API usage.
 ## Installation
 **Note**: A `llama-cpp-python` OpenAI Emulation Server is required alongside the backend server. The server in this code is configured to run on `127.0.0.1:8003`.
 1. Clone the repository:
   ```bash
   git clone https://github.com/your-repo.git
   cd your-repo
   ```
 2. Install dependencies:
   ```bash
   npm install
   ```
 3. Create a `.env` file in the project root with the following content:
   ```bash
   MAX_CONTENT_LENGTH=2000
   ```
 4. Start the server:
   ```bash
   npm start
   ```
 ## Endpoints
 ### 1. `/api/v1/chat`
 - **Method**: `POST`
 - **Description**: Processes a user message, analyzes it for security threats, and returns a response.
 - **Request Body**:
  - `message`: The message to be analyzed.
 - **Response**: JSON object containing the response from the GPT model.
 ### 2. `/api/v1/conversation-history`
 - **Method**: `GET`
 - **Description**: Retrieves the conversation history for the requesting client's IP.
 - **Response**: JSON object containing the conversation history.
 ### 3. `/api/v1/restart-core`
 - **Method**: `POST`
 - **Description**: Restarts the core GPT service running in a Docker container.
 - **Response**: JSON object confirming the restart or detailing any errors.
 ### 4. `/api/v1/reset-conversation`
 - **Method**: `POST`
 - **Description**: Resets the conversation history for the requesting client's IP.
 - **Response**: JSON object confirming the reset.
 ## Environment Variables
 - `MAX_CONTENT_LENGTH`: The maximum length for the content extracted from web pages during scraping (default: 2000 characters).
 ## Usage
 1. Send a POST request to `/api/v1/chat` with a message to analyze web traffic logs.
 2. Use the `/api/v1/conversation-history` endpoint to fetch the chat history.
 3. Restart the core service using the `/api/v1/restart-core` endpoint if needed.
 4. Reset conversation history using the `/api/v1/reset-conversation` endpoint.
 ## Logging
 The server logs key actions, including incoming requests, conversation history management, and errors. Logs are timestamped and include IP addresses for traceability.
 ## Notes
 - Ensure the `llama-gpu-server` Docker container is running before starting the Express.js server.
 - Conversation history is stored in memory and will be lost when the server restarts. Consider implementing persistent storage if long-term history is required.
 ## Contributions
 Contributions are welcome! Please fork the repository and submit a pull request with your changes.
 This project leverages cutting-edge AI to enhance web security analysis, making it easier to identify and respond to threats in real-time.
--- a/ai_log.js
+++ b/ai_log.js
@@ -0,0 +1,236 @@
 import express from 'express';
 import axios from 'axios';
 import bodyParser from 'body-parser';
 import cmd from 'cmd-promise';
 import cors from 'cors';
 import cheerio from 'cheerio';
 import 'dotenv/config';
 import llamaTokenizer from 'llama-tokenizer-js';
 const prompt = `
 You are a security AI responsible for analyzing web traffic from NGINX logs and blocking malicious IPs. Your task is to review the logs for potential attacks and issues. If you identify a verified problem, include [ALERT] followed by a detailed description of the issue in your response. Ensure your message is formatted in Markdown compatible with Discord.
 **Guidelines:**
 - Domains on the server: shells.lol, hehe.rest, dcord.rest, nodejs.lol, dht.rest, my-mc.link, ssh.surf, tcp.quest
 - Use emojis to enhance communication.
 - Do not report IP addresses for scraping or crawling.
 - Ignore IPs: x.x.x.x, x.x.x.x, x.x.x.x, x.x.x.x. Do not mention these in reports.
 - Ignore IP addresses with BOGONs such as 192.168.0.1 or 127.0.0.2, etc.
 - Avoid alerting for false positives or irregular activity.
 - If there are no alerts but you have interesting findings, write: GENERAL followed by your insights in Markdown.
 - Only send GENERAL messages for noteworthy events, not for routine traffic reports.
 - In a GENERAL message, feel free to provide a long explainer on your deductions.
 - Be decisive. If an IP is being malicious, block it. Do not monitor IPs for further traffic.
 - Do not ban an IP address without a corresponding log entry, provide this in your response.
 - Block all bot IPs and information scanners except Google.
 - Provide proof and reasoning for each ban.
 - DO NOT BAN AN IP THAT IS NOT IN A LOG EVER! YOU MUST SEE THE IP ADDRESS!
 - To ban an IP or flag it as a security risk, wrap it in a Markdown spoiler: ||IPHERE||
 `;
 const app = express();
 const port = 3001;
 app.use(cors()); // Enable CORS for all routes
 // Set a larger limit for the request body
 app.use(bodyParser.json({ limit: '50mb' })); // Adjust the limit as needed
 const TIMEOUT_DURATION = 100000; // Timeout duration in milliseconds (100 seconds)
 const MAX_TOKENS = 8000; // Maximum tokens allowed by the model
 const TOLERANCE = 100; // Tolerance to avoid exceeding token limit
 let conversationHistory = {};
 // Helper function to get current timestamp
 const getTimestamp = () => {
    const now = new Date();
    const date = now.toLocaleDateString('en-US');
    const time = now.toLocaleTimeString('en-US');
    return `${date} [${time}]`;
 };
 // Middleware to track conversation history by CF-Connecting-IP
 app.use((req, res, next) => {
    const ip = req.headers['cf-connecting-ip'] || req.headers['x-forwarded-for'] || req.headers['x-real-ip'] || req.ip;
    req.clientIp = ip; // Store the IP in a request property
    console.log(`${getTimestamp()} [INFO] Incoming request from IP: ${req.clientIp}`); // Log the IP address
    if (!conversationHistory[req.clientIp]) {
        console.log(`${getTimestamp()} [INFO] Initializing conversation history for new IP: ${req.clientIp}`);
        conversationHistory[req.clientIp] = [
            { role: 'system', content: prompt }
        ];
    }
    next();
 });
 async function countLlamaTokens(messages) {
    let totalTokens = 0;
    for (const message of messages) {
        if (message.role === 'user' || message.role === 'assistant') {
            const encodedTokens = llamaTokenizer.encode(message.content);
            totalTokens += encodedTokens.length;
        }
    }
    return totalTokens;
 }
 async function trimConversationHistory(messages, maxLength, tolerance) {
    let tokenLength = await countLlamaTokens(messages);
    while (tokenLength > maxLength - tolerance && messages.length > 1) {
        messages.splice(1, 1); // Remove the oldest user/assistant message
        tokenLength = await countLlamaTokens(messages);
        console.log(`${getTimestamp()} [CLEANUP] Trimmed conversation history to ${tokenLength} tokens.`);
    }
 }
 // Function to scrape web page
 async function scrapeWebPage(url) {
    console.log(`${getTimestamp()} [INFO] Starting to scrape URL: ${url}`);
    try {
        const res = await axios.get(url);
        const html = res.data;
        const $ = cheerio.load(html);
        // Extract page title, meta description and content
        const pageTitle = $('head title').text().trim();
        const pageDescription = $('head meta[name="description"]').attr('content');
        const pageContent = $('body').text().trim();
        // Construct response message with page details
        let response = `Title: ${pageTitle}\n`;
        if (pageDescription) {
            response += `Description: ${pageDescription}\n`;
        }
        if (pageContent) {
            const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH || 2000;
            let plainTextContent = $('<div>').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' ');
            const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/;
            const isCode = codePattern.test(plainTextContent);
            if (isCode) {
                plainTextContent = plainTextContent.replace(codePattern, '');
            }
            plainTextContent = plainTextContent.replace(/ *\([^)]*\) */g, '');
            if (plainTextContent.length > MAX_CONTENT_LENGTH) {
                plainTextContent = plainTextContent.substring(0, MAX_CONTENT_LENGTH) + '...';
            }
            response += `Content: ${plainTextContent.trim()}`;
        }
        response += `\nURL: ${url}`;
        console.log(`${getTimestamp()} [INFO] Successfully scraped URL: ${url}`);
        return response;
    } catch (err) {
        console.error(`${getTimestamp()} [ERROR] Failed to scrape URL: ${url}`, err);
        return null;
    }
 }
 async function processRequest(req, res) {
    const startTime = Date.now(); // Start time tracking
    const ip = req.clientIp;
    console.log(`${getTimestamp()} [INFO] Handling chat request from IP: ${ip}`); // Log the IP address
    const timeout = setTimeout(() => {
        console.error(`${getTimestamp()} [ERROR] Request timed out for IP: ${ip}`);
        res.status(408).json({ message: "Request timed out" });
    }, TIMEOUT_DURATION);
    try {
        let userMessage = req.body.message;
        console.log(`${getTimestamp()} [INFO] Received user message: ${userMessage}`);
        userMessage = req.body.message + `\nDate/Time:${getTimestamp()}`;
        if (!conversationHistory[ip]) {
            console.log(`${getTimestamp()} [INFO] Initializing conversation history for new IP: ${ip}`);
            conversationHistory[ip] = [{ role: 'system', content: prompt }];
        }
        conversationHistory[ip].push({ role: 'user', content: userMessage });
        // Trim conversation history if it exceeds the token limit
        await trimConversationHistory(conversationHistory[ip], MAX_TOKENS, TOLERANCE);
        const logLines = userMessage.split('\n');
        const clientIpRegex = /\[Client (\d{1,3}\.){3}\d{1,3}\]/;
        const filteredLogLines = logLines.filter(line => clientIpRegex.test(line));
        if (filteredLogLines.length === 0) {
            console.log(`${getTimestamp()} [INFO] No valid client IP addresses found in the log.`);
            res.json({ message: "No valid client IP addresses found in the log." });
            return;
        }
        const filteredMessage = filteredLogLines.join('\n');
        console.log(`${getTimestamp()} [INFO] Sending request to llama API for response`);
        const response = await axios.post('http://127.0.0.1:8003/v1/chat/completions', {
            model: 'gpt-3.5-turbo',
            messages: [...conversationHistory[ip], { role: 'user', content: filteredMessage }]
        });
        const assistantMessage = response.data.choices[0].message;
        conversationHistory[ip].push(assistantMessage);
        console.log(`${getTimestamp()} [INFO] Received response from llama API: ${assistantMessage.content}`);
        console.log(`${getTimestamp()} [DEBUG] Finish Reason: ${response.data.choices[0].finish_reason}`);
        console.log(`${getTimestamp()} [STATS] Usage: prompt_tokens=${response.data.usage.prompt_tokens}, completion_tokens=${response.data.usage.completion_tokens}, total_tokens=${response.data.usage.total_tokens}`);
        clearTimeout(timeout);
        res.json(assistantMessage);
    } catch (error) {
        console.error(`${getTimestamp()} [ERROR] An error occurred while handling chat request`, error);
        clearTimeout(timeout);
        res.status(500).json({ message: "An error occurred", error: error.message });
    } finally {
        const endTime = Date.now(); // End time tracking
        const processingTime = ((endTime - startTime) / 1000).toFixed(2); // Calculate processing time in seconds
        console.log(`${getTimestamp()} [STATS] Processing Time: ${processingTime} seconds`);
        console.log(`${getTimestamp()} [INFO] Finished processing chat request for IP: ${ip}`);
    }
 }
 app.post('/api/v1/chat', async (req, res) => {
    // Trim the incoming message to fit within token limits
    const messageContent = req.body.message;
    const encodedTokens = llamaTokenizer.encode(messageContent);
    const MAX_MESSAGE_TOKENS = MAX_TOKENS - (await countLlamaTokens([{ role: 'system', content: prompt }])) - TOLERANCE;
    let trimmedMessageContent = messageContent;
    if (encodedTokens.length > MAX_MESSAGE_TOKENS) {
        trimmedMessageContent = llamaTokenizer.decode(encodedTokens.slice(0, MAX_MESSAGE_TOKENS));
    }
    await processRequest({ ...req, body: { message: trimmedMessageContent } }, res);
 });
 app.get('/api/v1/conversation-history', (req, res) => {
    const ip = req.clientIp;
    console.log(`${getTimestamp()} [INFO] Fetching conversation history for IP: ${ip}`); // Log the IP address
    res.json(conversationHistory[ip]);
 });
 app.post('/api/v1/restart-core', (req, res) => {
    console.log(`${getTimestamp()} [INFO] Restarting core service`);
    cmd(`docker restart llama-gpu-server`).then(out => {
        console.log(`${getTimestamp()} [INFO] Core service restarted`);
        res.json(out.stdout);
    }).catch(err => {
        console.error(`${getTimestamp()} [ERROR] Failed to restart core service`, err);
        res.status(500).json({ message: "An error occurred while restarting the core service", error: err.message });
    });
 });
 app.post('/api/v1/reset-conversation', (req, res) => {
    const ip = req.clientIp;
    console.log(`${getTimestamp()} [INFO] Resetting conversation history for IP: ${ip}`); // Log the IP address
    conversationHistory[ip] = [
        { role: 'system', content: prompt }
    ];
    console.log(`${getTimestamp()} [INFO] Conversation history reset for IP: ${ip}`);
    res.json({ message: "Conversation history reset for IP: " + ip });
 });
 app.listen(port, () => {
    console.log(`${getTimestamp()} [INFO] Server running at http://localhost:${port}`);
 });
--- a/package.json
+++ b/package.json
@@ -0,0 +1,15 @@
 {
    "type": "module",
    "dependencies": {
      "axios": "^1.7.2",
      "body-parser": "^1.20.2",
      "cheerio": "^1.0.0-rc.12",
      "cmd-promise": "^1.2.0",
      "cors": "^2.8.5",
      "dotenv": "^16.4.5",
      "express": "^4.19.2",
      "llama-tokenizer-js": "^1.2.2",
      "node-fetch": "^2.7.0"
    }
  }