ai-nginx-log-security/ai_log.js

import express from 'express';
import axios from 'axios';
import bodyParser from 'body-parser';
import cmd from 'cmd-promise';
import cors from 'cors';
import cheerio from 'cheerio';
import 'dotenv/config';
import llamaTokenizer from 'llama-tokenizer-js';

const prompt = `
You are a security AI responsible for analyzing web traffic from NGINX logs and blocking malicious IPs. Your task is to review the logs for potential attacks and issues. If you identify a verified problem, include [ALERT] followed by a detailed description of the issue in your response. Ensure your message is formatted in Markdown compatible with Discord.

**Guidelines:**
- Domains on the server: shells.lol, hehe.rest, dcord.rest, nodejs.lol, dht.rest, my-mc.link, ssh.surf, tcp.quest
- Use emojis to enhance communication.
- Do not report IP addresses for scraping or crawling.
- Ignore IPs: x.x.x.x, x.x.x.x, x.x.x.x, x.x.x.x. Do not mention these in reports.
- Ignore IP addresses with BOGONs such as 192.168.0.1 or 127.0.0.2, etc.
- Avoid alerting for false positives or irregular activity.
- If there are no alerts but you have interesting findings, write: GENERAL followed by your insights in Markdown.
- Only send GENERAL messages for noteworthy events, not for routine traffic reports.
- In a GENERAL message, feel free to provide a long explainer on your deductions.
- Be decisive. If an IP is being malicious, block it. Do not monitor IPs for further traffic.
- Do not ban an IP address without a corresponding log entry, provide this in your response.
- Block all bot IPs and information scanners except Google.
- Provide proof and reasoning for each ban.
- DO NOT BAN AN IP THAT IS NOT IN A LOG EVER! YOU MUST SEE THE IP ADDRESS!
- To ban an IP or flag it as a security risk, wrap it in a Markdown spoiler: ||IPHERE||
`;

const app = express();
const port = 3001;

app.use(cors()); // Enable CORS for all routes

// Set a larger limit for the request body
app.use(bodyParser.json({ limit: '50mb' })); // Adjust the limit as needed

const TIMEOUT_DURATION = 100000; // Timeout duration in milliseconds (100 seconds)
const MAX_TOKENS = 8000; // Maximum tokens allowed by the model
const TOLERANCE = 100; // Tolerance to avoid exceeding token limit
let conversationHistory = {};

// Helper function to get current timestamp
const getTimestamp = () => {
    const now = new Date();
    const date = now.toLocaleDateString('en-US');
    const time = now.toLocaleTimeString('en-US');
    return `${date} [${time}]`;
};

// Middleware to track conversation history by CF-Connecting-IP
app.use((req, res, next) => {
    const ip = req.headers['cf-connecting-ip'] || req.headers['x-forwarded-for'] || req.headers['x-real-ip'] || req.ip;
    req.clientIp = ip; // Store the IP in a request property
    console.log(`${getTimestamp()} [INFO] Incoming request from IP: ${req.clientIp}`); // Log the IP address

    if (!conversationHistory[req.clientIp]) {
        console.log(`${getTimestamp()} [INFO] Initializing conversation history for new IP: ${req.clientIp}`);
        conversationHistory[req.clientIp] = [
            { role: 'system', content: prompt }
        ];
    }
    next();
});

async function countLlamaTokens(messages) {
    let totalTokens = 0;
    for (const message of messages) {
        if (message.role === 'user' || message.role === 'assistant') {
            const encodedTokens = llamaTokenizer.encode(message.content);
            totalTokens += encodedTokens.length;
        }
    }
    return totalTokens;
}

async function trimConversationHistory(messages, maxLength, tolerance) {
    let tokenLength = await countLlamaTokens(messages);
    while (tokenLength > maxLength - tolerance && messages.length > 1) {
        messages.splice(1, 1); // Remove the oldest user/assistant message
        tokenLength = await countLlamaTokens(messages);
        console.log(`${getTimestamp()} [CLEANUP] Trimmed conversation history to ${tokenLength} tokens.`);
    }
}

// Function to scrape web page
async function scrapeWebPage(url) {
    console.log(`${getTimestamp()} [INFO] Starting to scrape URL: ${url}`);
    try {
        const res = await axios.get(url);
        const html = res.data;
        const $ = cheerio.load(html);

        // Extract page title, meta description and content
        const pageTitle = $('head title').text().trim();
        const pageDescription = $('head meta[name="description"]').attr('content');
        const pageContent = $('body').text().trim();

        // Construct response message with page details
        let response = `Title: ${pageTitle}\n`;
        if (pageDescription) {
            response += `Description: ${pageDescription}\n`;
        }
        if (pageContent) {
            const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH || 2000;
            let plainTextContent = $('<div>').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' ');
            const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/;
            const isCode = codePattern.test(plainTextContent);

            if (isCode) {
                plainTextContent = plainTextContent.replace(codePattern, '');
            }
            plainTextContent = plainTextContent.replace(/ *\([^)]*\) */g, '');
            if (plainTextContent.length > MAX_CONTENT_LENGTH) {
                plainTextContent = plainTextContent.substring(0, MAX_CONTENT_LENGTH) + '...';
            }
            response += `Content: ${plainTextContent.trim()}`;
        }
        response += `\nURL: ${url}`;

        console.log(`${getTimestamp()} [INFO] Successfully scraped URL: ${url}`);
        return response;
    } catch (err) {
        console.error(`${getTimestamp()} [ERROR] Failed to scrape URL: ${url}`, err);
        return null;
    }
}

async function processRequest(req, res) {
    const startTime = Date.now(); // Start time tracking
    const ip = req.clientIp;
    console.log(`${getTimestamp()} [INFO] Handling chat request from IP: ${ip}`); // Log the IP address

    const timeout = setTimeout(() => {
        console.error(`${getTimestamp()} [ERROR] Request timed out for IP: ${ip}`);
        res.status(408).json({ message: "Request timed out" });
    }, TIMEOUT_DURATION);

    try {
        let userMessage = req.body.message;
        console.log(`${getTimestamp()} [INFO] Received user message: ${userMessage}`);
        userMessage = req.body.message + `\nDate/Time:${getTimestamp()}`;

        if (!conversationHistory[ip]) {
            console.log(`${getTimestamp()} [INFO] Initializing conversation history for new IP: ${ip}`);
            conversationHistory[ip] = [{ role: 'system', content: prompt }];
        }
        conversationHistory[ip].push({ role: 'user', content: userMessage });

        // Trim conversation history if it exceeds the token limit
        await trimConversationHistory(conversationHistory[ip], MAX_TOKENS, TOLERANCE);

        const logLines = userMessage.split('\n');
        const clientIpRegex = /\[Client (\d{1,3}\.){3}\d{1,3}\]/;
        const filteredLogLines = logLines.filter(line => clientIpRegex.test(line));

        if (filteredLogLines.length === 0) {
            console.log(`${getTimestamp()} [INFO] No valid client IP addresses found in the log.`);
            res.json({ message: "No valid client IP addresses found in the log." });
            return;
        }

        const filteredMessage = filteredLogLines.join('\n');

        console.log(`${getTimestamp()} [INFO] Sending request to llama API for response`);
        const response = await axios.post('http://127.0.0.1:8003/v1/chat/completions', {
            model: 'gpt-3.5-turbo',
            messages: [...conversationHistory[ip], { role: 'user', content: filteredMessage }]
        });
        const assistantMessage = response.data.choices[0].message;
        conversationHistory[ip].push(assistantMessage);

        console.log(`${getTimestamp()} [INFO] Received response from llama API: ${assistantMessage.content}`);
        console.log(`${getTimestamp()} [DEBUG] Finish Reason: ${response.data.choices[0].finish_reason}`);
        console.log(`${getTimestamp()} [STATS] Usage: prompt_tokens=${response.data.usage.prompt_tokens}, completion_tokens=${response.data.usage.completion_tokens}, total_tokens=${response.data.usage.total_tokens}`);

        clearTimeout(timeout);
        res.json(assistantMessage);
    } catch (error) {
        console.error(`${getTimestamp()} [ERROR] An error occurred while handling chat request`, error);
        clearTimeout(timeout);
        res.status(500).json({ message: "An error occurred", error: error.message });
    } finally {
        const endTime = Date.now(); // End time tracking
        const processingTime = ((endTime - startTime) / 1000).toFixed(2); // Calculate processing time in seconds
        console.log(`${getTimestamp()} [STATS] Processing Time: ${processingTime} seconds`);
        console.log(`${getTimestamp()} [INFO] Finished processing chat request for IP: ${ip}`);
    }
}

app.post('/api/v1/chat', async (req, res) => {
    // Trim the incoming message to fit within token limits
    const messageContent = req.body.message;
    const encodedTokens = llamaTokenizer.encode(messageContent);
    const MAX_MESSAGE_TOKENS = MAX_TOKENS - (await countLlamaTokens([{ role: 'system', content: prompt }])) - TOLERANCE;

    let trimmedMessageContent = messageContent;
    if (encodedTokens.length > MAX_MESSAGE_TOKENS) {
        trimmedMessageContent = llamaTokenizer.decode(encodedTokens.slice(0, MAX_MESSAGE_TOKENS));
    }

    await processRequest({ ...req, body: { message: trimmedMessageContent } }, res);
});

app.get('/api/v1/conversation-history', (req, res) => {
    const ip = req.clientIp;
    console.log(`${getTimestamp()} [INFO] Fetching conversation history for IP: ${ip}`); // Log the IP address
    res.json(conversationHistory[ip]);
});

app.post('/api/v1/restart-core', (req, res) => {
    console.log(`${getTimestamp()} [INFO] Restarting core service`);
    cmd(`docker restart llama-gpu-server`).then(out => {
        console.log(`${getTimestamp()} [INFO] Core service restarted`);
        res.json(out.stdout);
    }).catch(err => {
        console.error(`${getTimestamp()} [ERROR] Failed to restart core service`, err);
        res.status(500).json({ message: "An error occurred while restarting the core service", error: err.message });
    });
});

app.post('/api/v1/reset-conversation', (req, res) => {
    const ip = req.clientIp;
    console.log(`${getTimestamp()} [INFO] Resetting conversation history for IP: ${ip}`); // Log the IP address

    conversationHistory[ip] = [
        { role: 'system', content: prompt }
    ];
    console.log(`${getTimestamp()} [INFO] Conversation history reset for IP: ${ip}`);
    res.json({ message: "Conversation history reset for IP: " + ip });
});

app.listen(port, () => {
    console.log(`${getTimestamp()} [INFO] Server running at http://localhost:${port}`);
});
first commit 2024-08-09 03:24:50 -04:00			`import express from 'express';`
			`import axios from 'axios';`
			`import bodyParser from 'body-parser';`
			`import cmd from 'cmd-promise';`
			`import cors from 'cors';`
			`import cheerio from 'cheerio';`
			`import 'dotenv/config';`
			`import llamaTokenizer from 'llama-tokenizer-js';`

			const prompt = `
			`You are a security AI responsible for analyzing web traffic from NGINX logs and blocking malicious IPs. Your task is to review the logs for potential attacks and issues. If you identify a verified problem, include [ALERT] followed by a detailed description of the issue in your response. Ensure your message is formatted in Markdown compatible with Discord.`

			`Guidelines:`
			`- Domains on the server: shells.lol, hehe.rest, dcord.rest, nodejs.lol, dht.rest, my-mc.link, ssh.surf, tcp.quest`
			`- Use emojis to enhance communication.`
			`- Do not report IP addresses for scraping or crawling.`
			`- Ignore IPs: x.x.x.x, x.x.x.x, x.x.x.x, x.x.x.x. Do not mention these in reports.`
			`- Ignore IP addresses with BOGONs such as 192.168.0.1 or 127.0.0.2, etc.`
			`- Avoid alerting for false positives or irregular activity.`
			`- If there are no alerts but you have interesting findings, write: GENERAL followed by your insights in Markdown.`
			`- Only send GENERAL messages for noteworthy events, not for routine traffic reports.`
			`- In a GENERAL message, feel free to provide a long explainer on your deductions.`
			`- Be decisive. If an IP is being malicious, block it. Do not monitor IPs for further traffic.`
			`- Do not ban an IP address without a corresponding log entry, provide this in your response.`
			`- Block all bot IPs and information scanners except Google.`
			`- Provide proof and reasoning for each ban.`
			`- DO NOT BAN AN IP THAT IS NOT IN A LOG EVER! YOU MUST SEE THE IP ADDRESS!`
			`- To ban an IP or flag it as a security risk, wrap it in a Markdown spoiler: \|\|IPHERE\|\|`
			`;

			`const app = express();`
			`const port = 3001;`

			`app.use(cors()); // Enable CORS for all routes`

			`// Set a larger limit for the request body`
			`app.use(bodyParser.json({ limit: '50mb' })); // Adjust the limit as needed`

			`const TIMEOUT_DURATION = 100000; // Timeout duration in milliseconds (100 seconds)`
			`const MAX_TOKENS = 8000; // Maximum tokens allowed by the model`
			`const TOLERANCE = 100; // Tolerance to avoid exceeding token limit`
			`let conversationHistory = {};`

			`// Helper function to get current timestamp`
			`const getTimestamp = () => {`
			`const now = new Date();`
			`const date = now.toLocaleDateString('en-US');`
			`const time = now.toLocaleTimeString('en-US');`
			return `${date} [${time}]`;
			`};`

			`// Middleware to track conversation history by CF-Connecting-IP`
			`app.use((req, res, next) => {`
			`const ip = req.headers['cf-connecting-ip'] \|\| req.headers['x-forwarded-for'] \|\| req.headers['x-real-ip'] \|\| req.ip;`
			`req.clientIp = ip; // Store the IP in a request property`
			console.log(`${getTimestamp()} [INFO] Incoming request from IP: ${req.clientIp}`); // Log the IP address

			`if (!conversationHistory[req.clientIp]) {`
			console.log(`${getTimestamp()} [INFO] Initializing conversation history for new IP: ${req.clientIp}`);
			`conversationHistory[req.clientIp] = [`
			`{ role: 'system', content: prompt }`
			`];`
			`}`
			`next();`
			`});`

			`async function countLlamaTokens(messages) {`
			`let totalTokens = 0;`
			`for (const message of messages) {`
			`if (message.role === 'user' \|\| message.role === 'assistant') {`
			`const encodedTokens = llamaTokenizer.encode(message.content);`
			`totalTokens += encodedTokens.length;`
			`}`
			`}`
			`return totalTokens;`
			`}`

			`async function trimConversationHistory(messages, maxLength, tolerance) {`
			`let tokenLength = await countLlamaTokens(messages);`
			`while (tokenLength > maxLength - tolerance && messages.length > 1) {`
			`messages.splice(1, 1); // Remove the oldest user/assistant message`
			`tokenLength = await countLlamaTokens(messages);`
			console.log(`${getTimestamp()} [CLEANUP] Trimmed conversation history to ${tokenLength} tokens.`);
			`}`
			`}`

			`// Function to scrape web page`
			`async function scrapeWebPage(url) {`
			console.log(`${getTimestamp()} [INFO] Starting to scrape URL: ${url}`);
			`try {`
			`const res = await axios.get(url);`
			`const html = res.data;`
			`const $ = cheerio.load(html);`

			`// Extract page title, meta description and content`
			`const pageTitle = $('head title').text().trim();`
			`const pageDescription = $('head meta[name="description"]').attr('content');`
			`const pageContent = $('body').text().trim();`

			`// Construct response message with page details`
			let response = `Title: ${pageTitle}\n`;
			`if (pageDescription) {`
			response += `Description: ${pageDescription}\n`;
			`}`
			`if (pageContent) {`
			`const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH \|\| 2000;`
			`let plainTextContent = $('<div>').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' ');`
			`const codePattern = /\/\/\|\/\\|\\/\|\{\|\}\|\[\|\]\|\bfunction\b\|\bclass\b\|\b0x[0-9A-Fa-f]+\b\|\b0b[01]+\b/;`
			`const isCode = codePattern.test(plainTextContent);`

			`if (isCode) {`
			`plainTextContent = plainTextContent.replace(codePattern, '');`
			`}`
			`plainTextContent = plainTextContent.replace(/ \([^)]\) */g, '');`
			`if (plainTextContent.length > MAX_CONTENT_LENGTH) {`
			`plainTextContent = plainTextContent.substring(0, MAX_CONTENT_LENGTH) + '...';`
			`}`
			response += `Content: ${plainTextContent.trim()}`;
			`}`
			response += `\nURL: ${url}`;

			console.log(`${getTimestamp()} [INFO] Successfully scraped URL: ${url}`);
			`return response;`
			`} catch (err) {`
			console.error(`${getTimestamp()} [ERROR] Failed to scrape URL: ${url}`, err);
			`return null;`
			`}`
			`}`

			`async function processRequest(req, res) {`
			`const startTime = Date.now(); // Start time tracking`
			`const ip = req.clientIp;`
			console.log(`${getTimestamp()} [INFO] Handling chat request from IP: ${ip}`); // Log the IP address

			`const timeout = setTimeout(() => {`
			console.error(`${getTimestamp()} [ERROR] Request timed out for IP: ${ip}`);
			`res.status(408).json({ message: "Request timed out" });`
			`}, TIMEOUT_DURATION);`

			`try {`
			`let userMessage = req.body.message;`
			console.log(`${getTimestamp()} [INFO] Received user message: ${userMessage}`);
			userMessage = req.body.message + `\nDate/Time:${getTimestamp()}`;

			`if (!conversationHistory[ip]) {`
			console.log(`${getTimestamp()} [INFO] Initializing conversation history for new IP: ${ip}`);
			`conversationHistory[ip] = [{ role: 'system', content: prompt }];`
			`}`
			`conversationHistory[ip].push({ role: 'user', content: userMessage });`

			`// Trim conversation history if it exceeds the token limit`
			`await trimConversationHistory(conversationHistory[ip], MAX_TOKENS, TOLERANCE);`

			`const logLines = userMessage.split('\n');`
			`const clientIpRegex = /\[Client (\d{1,3}\.){3}\d{1,3}\]/;`
			`const filteredLogLines = logLines.filter(line => clientIpRegex.test(line));`

			`if (filteredLogLines.length === 0) {`
			console.log(`${getTimestamp()} [INFO] No valid client IP addresses found in the log.`);
			`res.json({ message: "No valid client IP addresses found in the log." });`
			`return;`
			`}`

			`const filteredMessage = filteredLogLines.join('\n');`

			console.log(`${getTimestamp()} [INFO] Sending request to llama API for response`);
			`const response = await axios.post('http://127.0.0.1:8003/v1/chat/completions', {`
			`model: 'gpt-3.5-turbo',`
			`messages: [...conversationHistory[ip], { role: 'user', content: filteredMessage }]`
			`});`
			`const assistantMessage = response.data.choices[0].message;`
			`conversationHistory[ip].push(assistantMessage);`

			console.log(`${getTimestamp()} [INFO] Received response from llama API: ${assistantMessage.content}`);
			console.log(`${getTimestamp()} [DEBUG] Finish Reason: ${response.data.choices[0].finish_reason}`);
			console.log(`${getTimestamp()} [STATS] Usage: prompt_tokens=${response.data.usage.prompt_tokens}, completion_tokens=${response.data.usage.completion_tokens}, total_tokens=${response.data.usage.total_tokens}`);

			`clearTimeout(timeout);`
			`res.json(assistantMessage);`
			`} catch (error) {`
			console.error(`${getTimestamp()} [ERROR] An error occurred while handling chat request`, error);
			`clearTimeout(timeout);`
			`res.status(500).json({ message: "An error occurred", error: error.message });`
			`} finally {`
			`const endTime = Date.now(); // End time tracking`
			`const processingTime = ((endTime - startTime) / 1000).toFixed(2); // Calculate processing time in seconds`
			console.log(`${getTimestamp()} [STATS] Processing Time: ${processingTime} seconds`);
			console.log(`${getTimestamp()} [INFO] Finished processing chat request for IP: ${ip}`);
			`}`
			`}`

			`app.post('/api/v1/chat', async (req, res) => {`
			`// Trim the incoming message to fit within token limits`
			`const messageContent = req.body.message;`
			`const encodedTokens = llamaTokenizer.encode(messageContent);`
			`const MAX_MESSAGE_TOKENS = MAX_TOKENS - (await countLlamaTokens([{ role: 'system', content: prompt }])) - TOLERANCE;`

			`let trimmedMessageContent = messageContent;`
			`if (encodedTokens.length > MAX_MESSAGE_TOKENS) {`
			`trimmedMessageContent = llamaTokenizer.decode(encodedTokens.slice(0, MAX_MESSAGE_TOKENS));`
			`}`

			`await processRequest({ ...req, body: { message: trimmedMessageContent } }, res);`
			`});`

			`app.get('/api/v1/conversation-history', (req, res) => {`
			`const ip = req.clientIp;`
			console.log(`${getTimestamp()} [INFO] Fetching conversation history for IP: ${ip}`); // Log the IP address
			`res.json(conversationHistory[ip]);`
			`});`

			`app.post('/api/v1/restart-core', (req, res) => {`
			console.log(`${getTimestamp()} [INFO] Restarting core service`);
			cmd(`docker restart llama-gpu-server`).then(out => {
			console.log(`${getTimestamp()} [INFO] Core service restarted`);
			`res.json(out.stdout);`
			`}).catch(err => {`
			console.error(`${getTimestamp()} [ERROR] Failed to restart core service`, err);
			`res.status(500).json({ message: "An error occurred while restarting the core service", error: err.message });`
			`});`
			`});`

			`app.post('/api/v1/reset-conversation', (req, res) => {`
			`const ip = req.clientIp;`
			console.log(`${getTimestamp()} [INFO] Resetting conversation history for IP: ${ip}`); // Log the IP address

			`conversationHistory[ip] = [`
			`{ role: 'system', content: prompt }`
			`];`
			console.log(`${getTimestamp()} [INFO] Conversation history reset for IP: ${ip}`);
			`res.json({ message: "Conversation history reset for IP: " + ip });`
			`});`

			`app.listen(port, () => {`
			console.log(`${getTimestamp()} [INFO] Server running at http://localhost:${port}`);
			`});`