llama-cpp-python-djs-bot/llamabot.js

import "dotenv/config.js";
import fetch from 'node-fetch';
import { emptyResponses } from './assets/emptyMessages.js';
import { resetResponses, userResetMessages } from './assets/resetMessages.js';
import { errorMessages, busyResponses } from './assets/errorMessages.js';
import cpuStat from 'cpu-stat';
import os from 'os';
import smi from 'node-nvidia-smi';
import llamaTokenizer from 'llama-tokenizer-js'

import {
    Client,
    GatewayIntentBits,
    ActivityType,
    Partials
} from 'discord.js';

const client = new Client({
    intents: [
        GatewayIntentBits.DirectMessages,
        GatewayIntentBits.Guilds,
        GatewayIntentBits.GuildModeration,
        GatewayIntentBits.GuildMessages,
        GatewayIntentBits.MessageContent,
    ],
    partials: [Partials.Channel],
});

// Grab ChannelIDs from the .env file
const channelIDs = process.env.CHANNEL_IDS.split(',');

// Store Conversations in a MAP
const conversations = new Map();

let botMessage; // define a variable to hold the message object


// Set busy function this allows us to set our bot into busy mode
// locking out all other tasks until the current one is complete
function setBusy(userId, isBusy) {
    if (conversations.has(userId)) {
        conversations.get(userId).busy = isBusy;
    } else {
        conversations.set(userId, {
            busy: isBusy
        });
    }
}

// General check, if any conversation is busy
// If yes, flag it and let us know
function isAnyConversationBusy() {
    for (const conversation of conversations.values()) {
        if (conversation.busy) {
            setPresenceBusy()
            return true;
        }
    }
    return false;
}

// Setting our precence to busy within the bots status
function setPresenceBusy() {
    client.user.setPresence({
        activities: [{
            name: `Processing a Request`,
            type: ActivityType.Playing
        }],
        status: 'dnd',
    });
}


// Setting our precence to ready within the bots status
function setPresenceOnline() {
    client.user.setPresence({
        activities: [{
            name: `Ready for Request`,
            type: ActivityType.Playing
        }],
        status: 'online',
    });
}


// When we have logged in to discord api
// Set precence to online.
client.once('ready', () => {
    console.log('Bot is ready.');
    setPresenceOnline()
});

// When a message is sent within discord, lets handle it.
client.on('messageCreate', async (message) => {

    // Function to send a random message from any array
    async function sendRand(array) {
        const arrayChoice = array[Math.floor(Math.random() * array.length)];
        await message.channel.send(arrayChoice); // give a notification of reset using a human like response.
    }

    // Function to send a random Direct Message from any array
    async function sendRandDM(array) {
        const arrayChoice = array[Math.floor(Math.random() * array.length)];
        await message.author.send(arrayChoice); // give a notification of reset using a human like response.
    }

    // Only respond in the specified channels
    if (!channelIDs.includes(message.channel.id)) {
        return;
    }

    // Always ignore bots!
    if (message.author.bot) return;

    // Check if any conversation is busy
    if (isAnyConversationBusy()) {
        // Update bot presence to "Busy"
        setPresenceBusy()
        message.delete();
        sendRandDM(busyResponses);
        return;
    }

    // Set user ID and get our conversation.
    const userID = message.author.id;
    let conversation = conversations.get(userID) || {
        messages: [],
        busy: false
    };

    // If we do not have a conversation, lets generate one.
    // This requires a chatflow for the API.
    // Its better to have a default beginning conversation
    // Providing context for the AI Model.
    if (conversation.messages.length === 0) {
        conversation.messages.push({
            role: 'user',
            content: ` ${process.env.INIT_PROMPT}`
        });
        conversation.messages.push({
            role: 'user',
            content: ` User name: ${message.author.username}.`
        });
        conversation.messages.push({
            role: 'assistant',
            content: ` Hello, ${message.author.username}, how may I help you?`
        });
    }

    // If a user needs a reset, we delete their MAP
    if (message.content === '!reset' || message.content === '!r') {
        conversations.delete(userID); // Delete user's conversation map if they request reset
        sendRand(userResetMessages)
        return;
    }

    // Begin processing our conversation, this is our main work flow.

    // Append user message to conversation history
    conversation.messages.push({
        role: 'user',
        content: ` ${message.cleanContent}`
    });

    try {

        // Now we have our conversation set up
        // Lets set precence to busy
        // We also will set our conversations MAP to busy
        // Locking out all other tasks
        setPresenceBusy()
        setBusy(message.author.id, true);

        // Lets start generating the response
        const response = await generateResponse(conversation, message);

        // Append bot message to conversation history when it is ready
        conversation.messages.push({
            role: 'assistant',
            content: response
        });

        if (response && response.trim()) {
            // Send response to user if it's not empty
            const limit = 1980;

            // if we are over the discord char limit we need chunks...
            if (response.length > limit) {

                // We are going to check all of the message chunks if our response is too large for discord.
                // We can extend our message size using chunks, the issue?
                // Users can abuse this feature, we lock this to 15 to avoid API Abuse.
                const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
                if (chunks.length >= 40) return await message.channel.send("Response chunks too large. Try again");

                // If we do now have too many chunks, lets send each one using our overflow delay
                for (let i = 0; i < chunks.length; i++) {
                    setTimeout(() => {
                        message.channel.send(chunks[i]);
                    }, i * (process.env.OVERFLOW_DELAY || 3) * 1000); // delay of 3 seconds between each chunk to save on API requests
                }

            } else {
                // We are good to go message is not too large for discord, send the response
                await message.channel.send(response.replace("@", ""));
            }
            // We have completed our task, lets go online
            setPresenceOnline()
            // set our conversation MAP to not busy
            setBusy(message.author.id, false);
        } else {
            // Handle empty response here
            sendRand(emptyResponses)
            conversations.delete(userID); // Delete user's conversation map if they request reset
            sendRand(resetResponses)
            setPresenceOnline()
            conversation.busy = false;
        }

        conversations.set(userID, conversation); // Update user's conversation map in memory

        // Print the current conversation as it stands
        console.log(conversation)
    } catch (err) {
        // If we have any errors lets send a response
        console.error(err);
        return sendRand(errorMessages)
    } finally {
        // We are done! Lets finish up going online
        setPresenceOnline()
        setBusy(message.author.id, false);
    }
});

// Import cheerio for scraping
import cheerio from 'cheerio';

async function generateResponse(conversation, message) {
    // Begin web scraper if a https:// OR http:// URL is detected
    // Check if message contains a URL
    const urlRegex = /(https?:\/\/[^\s]+)/g;
    // Match our REGEX
    const urls = message.content.match(urlRegex);

    if (urls) {
        // If there are multiple URLs, process them one by one
        for (const url of urls) {
            try {
                const res = await fetch(url);
                const html = await res.text();
                const $ = cheerio.load(html);

                // Extract page title, meta description and content
                const pageTitle = $('head title').text().trim();
                const pageDescription = $('head meta[name="description"]').attr('content');
                const pageContent = $('body').text().trim();

                // Construct response message with page details
                let response = `Title: ${pageTitle}\n`;
                if (pageDescription) {
                    response += `Description: ${pageDescription}\n`;
                }
                if (pageContent) {
                    // Lets check for content and grab only the amount as configured.
                    const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH;
                    let plainTextContent = $('<div>').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' ');
                    // Clean up code remove it from processing
                    const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/;
                    const isCode = codePattern.test(plainTextContent);

                    if (isCode) {
                        plainTextContent = plainTextContent.replace(codePattern, '');
                    }
                    // Remove anything enclosed in brackets JUNK DATA
                    plainTextContent = plainTextContent.replace(/ *\([^)]*\) */g, '');
                    if (plainTextContent.length > MAX_CONTENT_LENGTH) {
                        plainTextContent = plainTextContent.substring(0, MAX_CONTENT_LENGTH) + '...';
                    }
                    response += `Content: ${plainTextContent.trim()}`;
                }
                response += `URL: ${url}`;

                // Get the index of the last message in the array
                const lastMessageIndex = conversation.messages.length - 1;

                // Append a new line and the new content to the existing content of the last message
                conversation.messages[lastMessageIndex].content += "\n" + response;

                console.log("A URL was provided, response: " + response)

            } catch (err) {
                console.error(err);
                return sendRand(errorMessages);
            }
        }
    }

    // We need an abort controller to stop our progress message editor
    const controller = new AbortController();
    // Set our timeout for the controller
    const timeout = setTimeout(() => {
        controller.abort();
    }, 900000);

    // Copy our messages from MAP
    const messagesCopy = [...conversation.messages]; // create a copy of the messages array

    let time = 0
    // define a function that shows the system load percentage and updates the message
    const showSystemLoad = async () => {
        // Configure our inital time
        time = Number(time) + Number(process.env.REFRESH_INTERVAL);
        // Get system stats
        cpuStat.usagePercent(function (err, percent, seconds) {
            if (err) {
                return console.log(err);
            }
            // Setting out system stat vars
            const systemLoad = percent;
            const freeMemory = os.freemem() / 1024 / 1024 / 1024;
            const totalMemory = os.totalmem() / 1024 / 1024 / 1024;
            const usedMemory = totalMemory - freeMemory;

            // lets build some embed data
            let embedData;

            // If we have NO GPU config lets send system stats only
            if (process.env.GPU == 0) {
                embedData = {
                    color: 0x0099ff,
                    title: 'Please wait.. I am thinking...',
                    fields: [
                        {
                            name: 'System Load',
                            value: `${systemLoad.toFixed(2)}%`,
                        },
                        {
                            name: 'Memory Usage',
                            value: `${usedMemory.toFixed(2)} GB / ${totalMemory.toFixed(2)} GB`,
                        },
                        {
                            name: 'Time',
                            value: `~${time} seconds.`,
                        },
                    ],
                };
                // if the message object doesn't exist, create it
                if (!botMessage) {
                    (async () => {
                        if (time == 0) return
                        botMessage = await message.channel.send({ embeds: [embedData] });
                    })();
                } else {
                    (async () => {
                        if (!isAnyConversationBusy()) {
                            botMessage.delete()
                        } else {
                            await botMessage.edit({ embeds: [embedData] }); // otherwise, update the message
                        }
                    })();
                }
            } else {
                // If we do have GPU=1 lets send some card info too!
                smi(function (err, data) {
                    if (err) {
                        // Handle error if smi function fails
                        console.error(err);
                        return;
                    }

                    let utilization = data.nvidia_smi_log.gpu.utilization;
                    let gpuUtilization = utilization.gpu_util;
                    let memoryUtilization = utilization.memory_util;
                    let gpuTemp = data.nvidia_smi_log.gpu.temperature.gpu_temp;

                    // These are not used until nvidia-docker fixes their support
                    let gpuTarget = data.nvidia_smi_log.gpu.temperature.gpu_target_temperature;
                    let gpuFanSpeed = data.nvidia_smi_log.gpu.fan_speed;
                    embedData = {
                        color: 0x0099ff,
                        title: 'Please wait.. I am thinking...',
                        fields: [
                            {
                                name: 'System Load',
                                value: `${systemLoad.toFixed(2)}%`,
                            },
                            {
                                name: 'Memory Usage',
                                value: `${usedMemory.toFixed(2)} GB / ${totalMemory.toFixed(2)} GB`,
                            },
                            {
                                name: 'GPU Utilization',
                                value: `${gpuUtilization}`,
                            },
                            {
                                name: 'Memory Utilization',
                                value: `${memoryUtilization}`,
                            },
                            {
                                name: 'GPU Temperature',
                                value: `${gpuTemp}`,
                            },
                            {
                                name: 'Time',
                                value: `~${time} seconds.`,
                            },
                        ],
                    };
                    // if the message object doesn't exist, create it
                    if (!botMessage) {
                        (async () => {
                            if (time == 0) return
                            botMessage = await message.channel.send({ embeds: [embedData] });
                        })();
                    } else {
                        (async () => {
                            if (!isAnyConversationBusy()) {
                                botMessage.delete()
                            } else {
                                await botMessage.edit({ embeds: [embedData] }); // otherwise, update the message
                            }
                        })();
                    }
                })
            }
        });
    };

    try {

        // call the function initially
        await showSystemLoad();

        // Grab the REFRESH_INTERVAL from ENV if not exist, lets use 7 (seconds)
        const refreshInterval = setInterval(showSystemLoad, (process.env.REFRESH_INTERVAL || 7) * 1000);

        function countLlamaTokens(messages) {
            let totalTokens = 0;

            for (const message of messages) {
                if (message.role === 'user' || message.role === 'assistant') {
                    const encodedTokens = llamaTokenizer.encode(message.content);
                    totalTokens += encodedTokens.length;
                }
            }

            return totalTokens;
        }

        let totalTokens = countLlamaTokens(messagesCopy);
        console.log(`Total Llama tokens: ${totalTokens}`);
        let tokenLength = totalTokens

        // Remove older conversations if necessary
        const maxLength = 1800;
        const tolerance = 25; // allow for some flexibility in the token length
        if (tokenLength > maxLength + tolerance) {
            const diff = tokenLength - (maxLength + tolerance);
            let removedTokens = 0;

            // Iterate over the messages in reverse order
            for (let i = messagesCopy.length - 1; i >= 0; i--) {
                const message = messagesCopy[i];
                const messageTokens = countLlamaTokens([message]);

                // Check if the current message plus the tokens in the message is less than or equal to the diff
                if (removedTokens + messageTokens <= diff) {
                    messagesCopy.splice(i, 1);
                    removedTokens += messageTokens;
                    console.log(removedTokens + " removed \nAfter Resize: " + countLlamaTokens(messagesCopy));
                } else {
                    // Remove more than one message if necessary to bring the total length below the maximum allowed length
                    const messagesToRemove = Math.floor(diff / messageTokens);
                    for (let j = 0; j < messagesToRemove; j++) {
                        messagesCopy.splice(i, 1);
                        removedTokens += messageTokens;
                    }
                    break;
                }
            }
        }
        // Sending request to our API
        const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
            method: 'POST',
            headers: {
                'accept': 'application/json',
                'Content-Type': 'application/json'
            },
            body: JSON.stringify({
                messages: messagesCopy,
                max_tokens: Number(process.env.MAX_TOKENS), // add the max_tokens parameter here
                repeat_penalty: Number(process.env.REPEAT_PENALTY)

            }),
            signal: controller.signal
        });

        const responseData = await response.json();
        console.log(JSON.stringify(responseData));
        const choice = responseData.choices[0];

        const responseText = choice.message.content;

        // clear the interval, replace the "please wait" message with the response, and update the message
        console.log(responseText);
        try {
        if (time > 2) {
        await botMessage.delete();
        clearInterval(refreshInterval);
        botMessage = null;
        console.log("Time limit reached. Message deleted.");
       }
      } catch (err) {
      console.log("Error deleting message: ", err);
     }


        return responseText;

    } catch (err) {
        throw err;
    } finally {
        clearTimeout(timeout);
        botMessage = null;
        time = 0
    }
}

client.login(process.env.THE_TOKEN); // Replace with your bot token