From a26b2c030dbe00213da262d728fb47e9014e87f3 Mon Sep 17 00:00:00 2001 From: Raven Date: Fri, 9 Aug 2024 03:24:50 -0400 Subject: [PATCH] first commit --- .gitignore | 2 + README.md | 90 ++++++++++++++++++++ ai_log.js | 236 +++++++++++++++++++++++++++++++++++++++++++++++++++ package.json | 15 ++++ 4 files changed, 343 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 ai_log.js create mode 100644 package.json diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..074ab52 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +package-lock.json +node_modules \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..a4d20a0 --- /dev/null +++ b/README.md @@ -0,0 +1,90 @@ + +# AI NGINX Log Analysis +## Overview + +This project is an Express.js-based server designed to interact with a GPT-3.5 model to analyze NGINX logs for potential security threats. The server processes incoming requests, analyzes the content, and provides responses that include alerts or general insights. The server can also scrape web pages and manage conversation histories per IP address. + +## Features + +- **NGINX Log Analysis**: Analyzes web traffic logs to identify potential security threats and generate appropriate responses. +- **Conversation History**: Maintains a history of interactions for each client IP, allowing for context-aware responses. +- **Web Scraping**: Scrapes web pages to extract and format relevant information. +- **Token Management**: Limits the number of tokens used in the conversation to ensure responses are within the model's limits. +- **Core Service Management**: Provides endpoints to restart the core GPT service and reset conversation histories. +- **Cross-Origin Resource Sharing (CORS)**: Enabled for all routes, allowing for flexible API usage. + +## Installation + +**Note**: A `llama-cpp-python` OpenAI Emulation Server is required alongside the backend server. The server in this code is configured to run on `127.0.0.1:8003`. + +1. Clone the repository: + ```bash + git clone https://github.com/your-repo.git + cd your-repo + ``` + +2. Install dependencies: + ```bash + npm install + ``` + +3. Create a `.env` file in the project root with the following content: + ```bash + MAX_CONTENT_LENGTH=2000 + ``` + +4. Start the server: + ```bash + npm start + ``` + +## Endpoints + +### 1. `/api/v1/chat` +- **Method**: `POST` +- **Description**: Processes a user message, analyzes it for security threats, and returns a response. +- **Request Body**: + - `message`: The message to be analyzed. +- **Response**: JSON object containing the response from the GPT model. + +### 2. `/api/v1/conversation-history` +- **Method**: `GET` +- **Description**: Retrieves the conversation history for the requesting client's IP. +- **Response**: JSON object containing the conversation history. + +### 3. `/api/v1/restart-core` +- **Method**: `POST` +- **Description**: Restarts the core GPT service running in a Docker container. +- **Response**: JSON object confirming the restart or detailing any errors. + +### 4. `/api/v1/reset-conversation` +- **Method**: `POST` +- **Description**: Resets the conversation history for the requesting client's IP. +- **Response**: JSON object confirming the reset. + +## Environment Variables + +- `MAX_CONTENT_LENGTH`: The maximum length for the content extracted from web pages during scraping (default: 2000 characters). + +## Usage + +1. Send a POST request to `/api/v1/chat` with a message to analyze web traffic logs. +2. Use the `/api/v1/conversation-history` endpoint to fetch the chat history. +3. Restart the core service using the `/api/v1/restart-core` endpoint if needed. +4. Reset conversation history using the `/api/v1/reset-conversation` endpoint. + +## Logging + +The server logs key actions, including incoming requests, conversation history management, and errors. Logs are timestamped and include IP addresses for traceability. + +## Notes + +- Ensure the `llama-gpu-server` Docker container is running before starting the Express.js server. +- Conversation history is stored in memory and will be lost when the server restarts. Consider implementing persistent storage if long-term history is required. + + +## Contributions + +Contributions are welcome! Please fork the repository and submit a pull request with your changes. + +This project leverages cutting-edge AI to enhance web security analysis, making it easier to identify and respond to threats in real-time. \ No newline at end of file diff --git a/ai_log.js b/ai_log.js new file mode 100644 index 0000000..3b11e0e --- /dev/null +++ b/ai_log.js @@ -0,0 +1,236 @@ +import express from 'express'; +import axios from 'axios'; +import bodyParser from 'body-parser'; +import cmd from 'cmd-promise'; +import cors from 'cors'; +import cheerio from 'cheerio'; +import 'dotenv/config'; +import llamaTokenizer from 'llama-tokenizer-js'; + +const prompt = ` +You are a security AI responsible for analyzing web traffic from NGINX logs and blocking malicious IPs. Your task is to review the logs for potential attacks and issues. If you identify a verified problem, include [ALERT] followed by a detailed description of the issue in your response. Ensure your message is formatted in Markdown compatible with Discord. + +**Guidelines:** +- Domains on the server: shells.lol, hehe.rest, dcord.rest, nodejs.lol, dht.rest, my-mc.link, ssh.surf, tcp.quest +- Use emojis to enhance communication. +- Do not report IP addresses for scraping or crawling. +- Ignore IPs: x.x.x.x, x.x.x.x, x.x.x.x, x.x.x.x. Do not mention these in reports. +- Ignore IP addresses with BOGONs such as 192.168.0.1 or 127.0.0.2, etc. +- Avoid alerting for false positives or irregular activity. +- If there are no alerts but you have interesting findings, write: GENERAL followed by your insights in Markdown. +- Only send GENERAL messages for noteworthy events, not for routine traffic reports. +- In a GENERAL message, feel free to provide a long explainer on your deductions. +- Be decisive. If an IP is being malicious, block it. Do not monitor IPs for further traffic. +- Do not ban an IP address without a corresponding log entry, provide this in your response. +- Block all bot IPs and information scanners except Google. +- Provide proof and reasoning for each ban. +- DO NOT BAN AN IP THAT IS NOT IN A LOG EVER! YOU MUST SEE THE IP ADDRESS! +- To ban an IP or flag it as a security risk, wrap it in a Markdown spoiler: ||IPHERE|| +`; + +const app = express(); +const port = 3001; + +app.use(cors()); // Enable CORS for all routes + +// Set a larger limit for the request body +app.use(bodyParser.json({ limit: '50mb' })); // Adjust the limit as needed + +const TIMEOUT_DURATION = 100000; // Timeout duration in milliseconds (100 seconds) +const MAX_TOKENS = 8000; // Maximum tokens allowed by the model +const TOLERANCE = 100; // Tolerance to avoid exceeding token limit +let conversationHistory = {}; + +// Helper function to get current timestamp +const getTimestamp = () => { + const now = new Date(); + const date = now.toLocaleDateString('en-US'); + const time = now.toLocaleTimeString('en-US'); + return `${date} [${time}]`; +}; + +// Middleware to track conversation history by CF-Connecting-IP +app.use((req, res, next) => { + const ip = req.headers['cf-connecting-ip'] || req.headers['x-forwarded-for'] || req.headers['x-real-ip'] || req.ip; + req.clientIp = ip; // Store the IP in a request property + console.log(`${getTimestamp()} [INFO] Incoming request from IP: ${req.clientIp}`); // Log the IP address + + if (!conversationHistory[req.clientIp]) { + console.log(`${getTimestamp()} [INFO] Initializing conversation history for new IP: ${req.clientIp}`); + conversationHistory[req.clientIp] = [ + { role: 'system', content: prompt } + ]; + } + next(); +}); + +async function countLlamaTokens(messages) { + let totalTokens = 0; + for (const message of messages) { + if (message.role === 'user' || message.role === 'assistant') { + const encodedTokens = llamaTokenizer.encode(message.content); + totalTokens += encodedTokens.length; + } + } + return totalTokens; +} + +async function trimConversationHistory(messages, maxLength, tolerance) { + let tokenLength = await countLlamaTokens(messages); + while (tokenLength > maxLength - tolerance && messages.length > 1) { + messages.splice(1, 1); // Remove the oldest user/assistant message + tokenLength = await countLlamaTokens(messages); + console.log(`${getTimestamp()} [CLEANUP] Trimmed conversation history to ${tokenLength} tokens.`); + } +} + +// Function to scrape web page +async function scrapeWebPage(url) { + console.log(`${getTimestamp()} [INFO] Starting to scrape URL: ${url}`); + try { + const res = await axios.get(url); + const html = res.data; + const $ = cheerio.load(html); + + // Extract page title, meta description and content + const pageTitle = $('head title').text().trim(); + const pageDescription = $('head meta[name="description"]').attr('content'); + const pageContent = $('body').text().trim(); + + // Construct response message with page details + let response = `Title: ${pageTitle}\n`; + if (pageDescription) { + response += `Description: ${pageDescription}\n`; + } + if (pageContent) { + const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH || 2000; + let plainTextContent = $('
').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' '); + const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/; + const isCode = codePattern.test(plainTextContent); + + if (isCode) { + plainTextContent = plainTextContent.replace(codePattern, ''); + } + plainTextContent = plainTextContent.replace(/ *\([^)]*\) */g, ''); + if (plainTextContent.length > MAX_CONTENT_LENGTH) { + plainTextContent = plainTextContent.substring(0, MAX_CONTENT_LENGTH) + '...'; + } + response += `Content: ${plainTextContent.trim()}`; + } + response += `\nURL: ${url}`; + + console.log(`${getTimestamp()} [INFO] Successfully scraped URL: ${url}`); + return response; + } catch (err) { + console.error(`${getTimestamp()} [ERROR] Failed to scrape URL: ${url}`, err); + return null; + } +} + +async function processRequest(req, res) { + const startTime = Date.now(); // Start time tracking + const ip = req.clientIp; + console.log(`${getTimestamp()} [INFO] Handling chat request from IP: ${ip}`); // Log the IP address + + const timeout = setTimeout(() => { + console.error(`${getTimestamp()} [ERROR] Request timed out for IP: ${ip}`); + res.status(408).json({ message: "Request timed out" }); + }, TIMEOUT_DURATION); + + try { + let userMessage = req.body.message; + console.log(`${getTimestamp()} [INFO] Received user message: ${userMessage}`); + userMessage = req.body.message + `\nDate/Time:${getTimestamp()}`; + + if (!conversationHistory[ip]) { + console.log(`${getTimestamp()} [INFO] Initializing conversation history for new IP: ${ip}`); + conversationHistory[ip] = [{ role: 'system', content: prompt }]; + } + conversationHistory[ip].push({ role: 'user', content: userMessage }); + + // Trim conversation history if it exceeds the token limit + await trimConversationHistory(conversationHistory[ip], MAX_TOKENS, TOLERANCE); + + const logLines = userMessage.split('\n'); + const clientIpRegex = /\[Client (\d{1,3}\.){3}\d{1,3}\]/; + const filteredLogLines = logLines.filter(line => clientIpRegex.test(line)); + + if (filteredLogLines.length === 0) { + console.log(`${getTimestamp()} [INFO] No valid client IP addresses found in the log.`); + res.json({ message: "No valid client IP addresses found in the log." }); + return; + } + + const filteredMessage = filteredLogLines.join('\n'); + + console.log(`${getTimestamp()} [INFO] Sending request to llama API for response`); + const response = await axios.post('http://127.0.0.1:8003/v1/chat/completions', { + model: 'gpt-3.5-turbo', + messages: [...conversationHistory[ip], { role: 'user', content: filteredMessage }] + }); + const assistantMessage = response.data.choices[0].message; + conversationHistory[ip].push(assistantMessage); + + console.log(`${getTimestamp()} [INFO] Received response from llama API: ${assistantMessage.content}`); + console.log(`${getTimestamp()} [DEBUG] Finish Reason: ${response.data.choices[0].finish_reason}`); + console.log(`${getTimestamp()} [STATS] Usage: prompt_tokens=${response.data.usage.prompt_tokens}, completion_tokens=${response.data.usage.completion_tokens}, total_tokens=${response.data.usage.total_tokens}`); + + clearTimeout(timeout); + res.json(assistantMessage); + } catch (error) { + console.error(`${getTimestamp()} [ERROR] An error occurred while handling chat request`, error); + clearTimeout(timeout); + res.status(500).json({ message: "An error occurred", error: error.message }); + } finally { + const endTime = Date.now(); // End time tracking + const processingTime = ((endTime - startTime) / 1000).toFixed(2); // Calculate processing time in seconds + console.log(`${getTimestamp()} [STATS] Processing Time: ${processingTime} seconds`); + console.log(`${getTimestamp()} [INFO] Finished processing chat request for IP: ${ip}`); + } +} + +app.post('/api/v1/chat', async (req, res) => { + // Trim the incoming message to fit within token limits + const messageContent = req.body.message; + const encodedTokens = llamaTokenizer.encode(messageContent); + const MAX_MESSAGE_TOKENS = MAX_TOKENS - (await countLlamaTokens([{ role: 'system', content: prompt }])) - TOLERANCE; + + let trimmedMessageContent = messageContent; + if (encodedTokens.length > MAX_MESSAGE_TOKENS) { + trimmedMessageContent = llamaTokenizer.decode(encodedTokens.slice(0, MAX_MESSAGE_TOKENS)); + } + + await processRequest({ ...req, body: { message: trimmedMessageContent } }, res); +}); + +app.get('/api/v1/conversation-history', (req, res) => { + const ip = req.clientIp; + console.log(`${getTimestamp()} [INFO] Fetching conversation history for IP: ${ip}`); // Log the IP address + res.json(conversationHistory[ip]); +}); + +app.post('/api/v1/restart-core', (req, res) => { + console.log(`${getTimestamp()} [INFO] Restarting core service`); + cmd(`docker restart llama-gpu-server`).then(out => { + console.log(`${getTimestamp()} [INFO] Core service restarted`); + res.json(out.stdout); + }).catch(err => { + console.error(`${getTimestamp()} [ERROR] Failed to restart core service`, err); + res.status(500).json({ message: "An error occurred while restarting the core service", error: err.message }); + }); +}); + +app.post('/api/v1/reset-conversation', (req, res) => { + const ip = req.clientIp; + console.log(`${getTimestamp()} [INFO] Resetting conversation history for IP: ${ip}`); // Log the IP address + + conversationHistory[ip] = [ + { role: 'system', content: prompt } + ]; + console.log(`${getTimestamp()} [INFO] Conversation history reset for IP: ${ip}`); + res.json({ message: "Conversation history reset for IP: " + ip }); +}); + +app.listen(port, () => { + console.log(`${getTimestamp()} [INFO] Server running at http://localhost:${port}`); +}); diff --git a/package.json b/package.json new file mode 100644 index 0000000..d9e40aa --- /dev/null +++ b/package.json @@ -0,0 +1,15 @@ +{ + "type": "module", + "dependencies": { + "axios": "^1.7.2", + "body-parser": "^1.20.2", + "cheerio": "^1.0.0-rc.12", + "cmd-promise": "^1.2.0", + "cors": "^2.8.5", + "dotenv": "^16.4.5", + "express": "^4.19.2", + "llama-tokenizer-js": "^1.2.2", + "node-fetch": "^2.7.0" + } + } + \ No newline at end of file