Containerize bot and server in one stack.

2023-04-11 19:44:11 -07:00
15 changed files with 34 additions and 1377 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,3 @@
 .env
 node_modules/
+package-lock.json
--- a/Dockerfile.gpu
+++ b/Dockerfile.gpu
@ -1,23 +0,0 @@
-FROM nvidia/cuda:12.1.1-devel-ubuntu20.04
-ENV DEBIAN_FRONTEND noninteractive
-
-WORKDIR /app
-
-RUN apt update
-
-RUN apt install sudo curl -y
-
-RUN apt-get install -y ca-certificates curl gnupg
-RUN sudo mkdir -p /etc/apt/keyrings
-RUN curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | sudo gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
-ENV NODE_MAJOR=18
-RUN echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | sudo tee /etc/apt/sources.list.d/nodesource.list
-RUN apt-get update
-RUN apt-get install nodejs -y
-
-COPY package*.json ./
-RUN npm install --omit=dev
-
-COPY . .
-
-CMD node llamabot.js
--- a/README.md
+++ b/README.md
@ -32,9 +32,6 @@ Define a generateResponse function that sends a request to the GPT-3 API to gene

 Call the generateResponse function within the messageCreate event listener function.

-![demo](https://media.discordapp.net/attachments/562897071326101515/1095738407826767922/image.png?width=1038&height=660 "demo")
-
-
 # Backend REQUIIRED

 The HTTP Server from https://abetlen.github.io/llama-cpp-python/ is required to use this bot.
@ -51,7 +48,7 @@ python3 -m llama_cpp.server

 Navigate to http://localhost:8000/docs to see the OpenAPI documentation.

-# Static Usage
+# Usage

 1) Use ```npm i ```

@ -63,41 +60,5 @@ Navigate to http://localhost:8000/docs to see the OpenAPI documentation.

 6) Run the bot ```node llamabot.js	```

-# Docker Compose 
-This will automatically configure the API for you as well as the bot in two seperate containers within a stack.
-
-1. `git clone https://git.ssh.surf/snxraven/llama-cpp-python-djs-bot.git`
-
-
-2. `cp default.env .env`
-
-3. Set DATA_DIR in .env to the exact location of your model files.
-
-4. Edit docker-compose.yaml MODEL to ensure the correct model bin is set
-
-5. `docker compose up -d`
-
-
-# Docker Compose with GPU
-This will automatically configure the API that supports cuBLAS and GPU inference for you as well as the bot in two seperate containers within a stack.
-
-NOTE: Caching for GPU has been fixed.
-
-1. `git clone https://git.ssh.surf/snxraven/llama-cpp-python-djs-bot.git` - Clone the repo
-
-2. `mv docker-compose.yml docker-compose.nogpu.yml; mv docker-compose.gpu.yml docker-compose.yml;` - Move nongpu compose out of the way, Enable GPU Support
-
-3. `mv Dockerfile Dockerfile.nongpu; mv Dockerfile.gpu Dockerfile;` - Move nongpu Dockerfile out of the way, enable GPU Support
-
-3. `cp default.gpu.env .env` - Copy the default GPU .env to its proper location
-
-4. Set DATA_DIR in .env to the exact location of your model files.
-
-5. Edit docker-compose.yaml MODEL to ensure the correct model bin is set
-
-6. set N_GPU_LAYERS to the amount of layers you would like to export to GPU
-
-7. `docker compose up -d`
-

 Want to make this better? Issue a pull request!
--- a/default.env
+++ b/default.env
@ -1,58 +1,5 @@
-# Discord Token
-THE_TOKEN = ""
-
-# The Channel IDs the bot will operate in seperated by commas
-CHANNEL_IDS = 
-
-# The INIT prompt for all conversations.
-INIT_PROMPT = "Assistant name: ChatAI. You code, write and provide any information without any mistakes. You can read basic text from URLs if a user sends a user via scraping."
-
-# Loading Emebed Refresh Timing
-REFRESH_INTERVAL=2
-
-# When a message is too large for discord we chunk the response into seperate messages.
-# To ensure we do not rate limit the bot we send these at a delay interval.
-# DEFAULT: 3 a good setting is between 3 and 7 seconds.
-OVERFLOW_DELAY=3
-
-# Max Content to fetch from given URLs
-MAX_CONTENT_LENGTH=2000
-
-# Max tokens for Generations
-MAX_TOKENS = 1499
-
-# ROOT_IP is only used when running the bot without docker compose
-ROOT_IP = 127.0.0.1
-
-# PORT  is only used when running the bot without docker compose
+THE_TOKEN = "DISCORD_TOKEN_HERE"
+CHANNEL_IDS = 1094494101631680653,1094628334727614605
+ROOT_IP = 192.168.0.15
 ROOT_PORT = 8000
-
-# Directory to your models (llama.cpp specfic settings)
-DATA_DIR = /Users/username/code/models
-
-# Enable Expirmental Message Caches (Limited to single session)
-# Cache will use ~1.4 GB or MORE of RAM. ONLY ENABLE IF YOUR SYSTEM CAN HANDLE THIS.
-CACHE = 1
-
-CACHE_TYPE = "disk"
-
-# Set number of threads to use, currently, a standard thread will utilize 1 whole core
-# I usually will set this between all cores I physcally have OR 2 cores less to allow for other processes.
-N_THREADS = 10
-
-# Always use MMAP unless you know what you are doing
-#USE_MMAP=1
-
-# Only use MLOCK if you know what it does!
-USE_MLOCK=0
-
-# The higher the number the more hard core.
-REPEAT_PENALTY=1
-
-# GPU SPECIFIC SETTINGS BELOW
-
-GPU=0
-
-N_GPU_LAYERS=32
-
-PYTHONUNBUFFERED=1
+DATA_DIR = /home/USERNAME/weights
--- a/docker-compose.gpu.yml
+++ b/docker-compose.gpu.yml
@ -1,34 +0,0 @@
-version: '3.9'
-
-services:
-  backend:
-    container_name: llama-gpu-server
-    restart: unless-stopped
-    build:
-      context: ./gpu-server
-    env_file: .env
-    volumes:
-      - ${DATA_DIR}:/usr/src/app/models
-    environment:
-      - HOST=llama-gpu-server
-      - MODEL=./models/ggml-vic7b-q5_1.bin.1
-      - NVIDIA_VISIBLE_DEVICES=all
-    runtime: nvidia
-    
-  frontend:
-    container_name: llama-djs-bot
-    restart: unless-stopped
-    build:
-      context: .
-    depends_on:
-      - backend
-    environment:
-      - THE_TOKEN
-      - REFRESH_INTERVAL
-      - CHANNEL_IDS
-      - GPU
-      - ROOT_IP=llama-gpu-server
-      - ROOT_PORT=8000
-      - INIT_PROMPT='Assistant Name':' ChatAI. You code, write and provide any information without any mistakes.'
-      - NVIDIA_VISIBLE_DEVICES=all
-    runtime: nvidia
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -8,10 +8,10 @@ services:
      context: ./server
    env_file: .env
    volumes:
-      - ${DATA_DIR}:/usr/src/app/models
+      - ${DATA_DIR}/weights:/usr/src/app/models
    environment:
      - HOST=llama-python-server
-      - MODEL=./models/vicuna-7b-1.1.ggmlv3.q6_K.bin
+      - MODEL=./models/gpt4-x-alpaca-13b-native-4bit-128g.bin
  llama-python-djs-bot:
    container_name: llama-python-djs-bot
    restart: unless-stopped
@ -21,9 +21,6 @@ services:
      - llama-python-server
    environment:
      - THE_TOKEN
-      - GPU
-      - REFRESH_INTERVAL
      - CHANNEL_IDS
      - ROOT_IP=llama-python-server
      - ROOT_PORT=8000
-      - INIT_PROMPT='Assistant Name':' ChatAI. You code, write and provide any information without any mistakes.'
--- a/gpu-server/Dockerfile
+++ b/gpu-server/Dockerfile
@ -1,19 +0,0 @@
-FROM nvidia/cuda:12.1.1-devel-ubuntu20.04
-
-# Install the deps
-ENV DEBIAN_FRONTEND=noninteractive
-ENV TZ=Etc/GMT
-RUN apt-get update && apt-get install -y --no-install-recommends python3 python3-pip git cmake build-essential
-# Get llama-cpp-python
-WORKDIR /usr/src
-
-WORKDIR /usr/src/app 
-
-# Build llama-cpp-python w/CuBLAS
-RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python[server]
-
-# We need to set the host to 0.0.0.0 to allow outside access
-ENV HOST 0.0.0.0
-
-# Run the server
-CMD python3 -m llama_cpp.server
--- a/huggingface-config/Dockerfile
+++ b/huggingface-config/Dockerfile
@ -1,48 +0,0 @@
-FROM ubuntu:latest
-
-RUN apt update
-
-RUN DEBIAN_FRONTEND=noninteractive  apt install curl sudo -y
-
-RUN apt-get install -y ca-certificates curl gnupg
-RUN sudo mkdir -p /etc/apt/keyrings
-RUN curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | sudo gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
-ENV NODE_MAJOR=18
-RUN echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | sudo tee /etc/apt/sources.list.d/nodesource.list
-RUN apt-get update
-RUN apt-get install nodejs -y 
-
-RUN DEBIAN_FRONTEND=noninteractive  apt install nodejs -y
-
-RUN npm i pm2 -g
-
-RUN mkdir -p /code/.pm2
-
-RUN mkdir -p /.pm2
-
-RUN chmod 0777 /code/.pm2 
-
-RUN chmod 0777 /.pm2
-
-RUN DEBIAN_FRONTEND=noninteractive  apt install wget python3 python3-pip -y
-
-WORKDIR /code
-
-RUN pip install --no-cache-dir llama-cpp-python[server]
-
-COPY . .
-
-RUN npm i
-
-ENV HOST localhost
-ENV PORT 7860
-ENV MODEL=/code/mistral-7b-instruct-v0.1.Q2_K.gguf
-ENV CACHE=1
-ENV USE_MLOCK=0
-ENV REPEAT_PENALTY=1
-ENV MODEL=/code/mistral-7b-instruct-v0.1.Q2_K.gguf
-ENV PM2_HOME=/code/.pm2
-
-RUN wget -q -O mistral-7b-instruct-v0.1.Q2_K.gguf "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q2_K.gguf?download=true"
-
-CMD /bin/bash /code/start.sh
--- a/huggingface-config/README.md
+++ b/huggingface-config/README.md
@ -1,17 +0,0 @@
-# How to Deploy on Hugging Face
-
-1) Move the "assets" directory and llamabot.js, package.json files into your HuggingFace repo directory.
-
-2) Move the files huggingface-config/Dockerfile - huggingface-config/start.sh - huggingface-config/startServices.json into the root of the hugging face repo.
-
-3) Edit Docker file as you need, the dockerfile is set to automatically download Vicuna 1.1 7B
-
-3) Move default.env into your repo as .env and edit for your needs
-
-4) Push the changes
-
-You should then see the bot being built and deployed on HuggingFace
-
-pm2 log will run automatically so you can see frontend and backend logs.
-
-PLEASE NOTE: Your hugging face repo should remain private!
--- a/huggingface-config/start.sh
+++ b/huggingface-config/start.sh
@ -1,6 +0,0 @@
-#!/bin/bash
-# This is the main process of the container and will stay alive as long as pm2 log is running.
-
-pm2 start startServices.json
-
-pm2 log
--- a/huggingface-config/startServices.json
+++ b/huggingface-config/startServices.json
@ -1,14 +0,0 @@
-{
-    "apps": [
-        {
-            "name": "Discord-Bot",
-            "script": "node /code/llamabot.js",
-            "args" : ""
-        },
-        {
-            "name": "AI-API",
-            "script": "python3 -m llama_cpp.server",
-            "args" : ""
-        }
-    ]
-}
--- a/llamabot.js
+++ b/llamabot.js
@ -3,17 +3,8 @@ import fetch from 'node-fetch';
 import { emptyResponses } from './assets/emptyMessages.js';
 import { resetResponses, userResetMessages } from './assets/resetMessages.js';
 import { errorMessages, busyResponses } from './assets/errorMessages.js';
-import cpuStat from 'cpu-stat';
-import os from 'os';
-import smi from 'node-nvidia-smi';
-import llamaTokenizer from 'llama-tokenizer-js'

-import {
-    Client,
-    GatewayIntentBits,
-    ActivityType,
-    Partials
-} from 'discord.js';
+import { Client, GatewayIntentBits, ActivityType, Partials } from 'discord.js';

 const client = new Client({
    intents: [
@ -29,14 +20,8 @@ const client = new Client({
 // Grab ChannelIDs from the .env file
 const channelIDs = process.env.CHANNEL_IDS.split(',');

-// Store Conversations in a MAP
 const conversations = new Map();

-let botMessage; // define a variable to hold the message object
-
-
-// Set busy function this allows us to set our bot into busy mode
-// locking out all other tasks until the current one is complete
 function setBusy(userId, isBusy) {
    if (conversations.has(userId)) {
        conversations.get(userId).busy = isBusy;
@ -47,8 +32,6 @@ function setBusy(userId, isBusy) {
    }
 }

-// General check, if any conversation is busy
-// If yes, flag it and let us know
 function isAnyConversationBusy() {
    for (const conversation of conversations.values()) {
        if (conversation.busy) {
@ -59,7 +42,6 @@ function isAnyConversationBusy() {
    return false;
 }

-// Setting our precence to busy within the bots status
 function setPresenceBusy() {
    client.user.setPresence({
        activities: [{
@ -70,8 +52,6 @@ function setPresenceBusy() {
    });
 }

-
-// Setting our precence to ready within the bots status
 function setPresenceOnline() {
    client.user.setPresence({
        activities: [{
@ -83,23 +63,18 @@ function setPresenceOnline() {
 }


-// When we have logged in to discord api
-// Set precence to online.
 client.once('ready', () => {
    console.log('Bot is ready.');
    setPresenceOnline()
 });

-// When a message is sent within discord, lets handle it.
 client.on('messageCreate', async (message) => {

-    // Function to send a random message from any array
    async function sendRand(array) {
        const arrayChoice = array[Math.floor(Math.random() * array.length)];
        await message.channel.send(arrayChoice); // give a notification of reset using a human like response.
    }

-    // Function to send a random Direct Message from any array
    async function sendRandDM(array) {
        const arrayChoice = array[Math.floor(Math.random() * array.length)];
        await message.author.send(arrayChoice); // give a notification of reset using a human like response.
@ -110,8 +85,7 @@ client.on('messageCreate', async (message) => {
    return;
    }

-    // Always ignore bots!
-    if (message.author.bot) return;
+    if (message.author.bot) return; // Ignore messages from bots

    // Check if any conversation is busy
    if (isAnyConversationBusy()) {
@ -121,61 +95,46 @@ client.on('messageCreate', async (message) => {
        sendRandDM(busyResponses);
        return;
    }
-
-    // Set user ID and get our conversation.
    const userID = message.author.id;
    let conversation = conversations.get(userID) || {
        messages: [],
        busy: false
    };

-    // If we do not have a conversation, lets generate one.
-    // This requires a chatflow for the API.
-    // Its better to have a default beginning conversation
-    // Providing context for the AI Model.
    if (conversation.messages.length === 0) {
        conversation.messages.push({
            role: 'user',
-            content: ` ${process.env.INIT_PROMPT}`
+            content: `Your name is rAi, you code, write and provide any information without any mistakes.`
        });
        conversation.messages.push({
            role: 'user',
-            content: ` User name: ${message.author.username}.`
+            content: `My name is ${message.author.username}.`
        });
        conversation.messages.push({
            role: 'assistant',
-            content: ` Hello, ${message.author.username}, how may I help you?`
+            content: `Hello, ${message.author.username}, how may I help you?`
        });
    }

-    // If a user needs a reset, we delete their MAP
    if (message.content === '!reset' || message.content === '!r') {
        conversations.delete(userID); // Delete user's conversation map if they request reset
        sendRand(userResetMessages)
        return;
    }

-    // Begin processing our conversation, this is our main work flow.
-
    // Append user message to conversation history
    conversation.messages.push({
        role: 'user',
-        content: ` ${message.cleanContent}`
+        content: message.cleanContent
    });

    try {
-
-        // Now we have our conversation set up
-        // Lets set precence to busy
-        // We also will set our conversations MAP to busy
-        // Locking out all other tasks
        setPresenceBusy()
        setBusy(message.author.id, true);

-        // Lets start generating the response
-        const response = await generateResponse(conversation, message);
+        const response = await generateResponse(conversation);

-        // Append bot message to conversation history when it is ready
+        // Append bot message to conversation history
        conversation.messages.push({
            role: 'assistant',
            content: response
@ -183,31 +142,8 @@ client.on('messageCreate', async (message) => {

        if (response && response.trim()) {
            // Send response to user if it's not empty
-            const limit = 1980;
-
-            // if we are over the discord char limit we need chunks...
-            if (response.length > limit) {
-
-                // We are going to check all of the message chunks if our response is too large for discord.
-                // We can extend our message size using chunks, the issue? 
-                // Users can abuse this feature, we lock this to 15 to avoid API Abuse.
-                const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
-                if (chunks.length >= 40) return await message.channel.send("Response chunks too large. Try again");
-
-                // If we do now have too many chunks, lets send each one using our overflow delay
-                for (let i = 0; i < chunks.length; i++) {
-                    setTimeout(() => {
-                        message.channel.send(chunks[i]);
-                    }, i * (process.env.OVERFLOW_DELAY || 3) * 1000); // delay of 3 seconds between each chunk to save on API requests
-                }
-
-            } else {
-                // We are good to go message is not too large for discord, send the response
-                await message.channel.send(response.replace("@", ""));
-            }
-            // We have completed our task, lets go online
+            await message.channel.send(response);
            setPresenceOnline()
-            // set our conversation MAP to not busy
            setBusy(message.author.id, false);
        } else {
            // Handle empty response here
@ -217,270 +153,27 @@ client.on('messageCreate', async (message) => {
            setPresenceOnline()
            conversation.busy = false;
        }
-
        conversations.set(userID, conversation); // Update user's conversation map in memory
-
-        // Print the current conversation as it stands
-        console.log(conversation)
    } catch (err) {
-        // If we have any errors lets send a response
        console.error(err);
-        return sendRand(errorMessages)
+        sendRand(errorMessages)
    } finally {
-        // We are done! Lets finish up going online
        setPresenceOnline()
        setBusy(message.author.id, false);
    }
 });

-// Import cheerio for scraping
-import cheerio from 'cheerio';
-
-async function generateResponse(conversation, message) {
-    // Begin web scraper if a https:// OR http:// URL is detected
-    // Check if message contains a URL
-    const urlRegex = /(https?:\/\/[^\s]+)/g;
-    // Match our REGEX
-    const urls = message.content.match(urlRegex);
-
-    if (urls) {
-        // If there are multiple URLs, process them one by one
-        for (const url of urls) {
-            try {
-                const res = await fetch(url);
-                const html = await res.text();
-                const $ = cheerio.load(html);
-
-                // Extract page title, meta description and content
-                const pageTitle = $('head title').text().trim();
-                const pageDescription = $('head meta[name="description"]').attr('content');
-                const pageContent = $('body').text().trim();
-
-                // Construct response message with page details
-                let response = `Title: ${pageTitle}\n`;
-                if (pageDescription) {
-                    response += `Description: ${pageDescription}\n`;
-                }
-                if (pageContent) {
-                    // Lets check for content and grab only the amount as configured.
-                    const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH;
-                    let plainTextContent = $('<div>').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' ');
-                    // Clean up code remove it from processing
-                    const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/;
-                    const isCode = codePattern.test(plainTextContent);
-
-                    if (isCode) {
-                        plainTextContent = plainTextContent.replace(codePattern, '');
-                    }
-                    // Remove anything enclosed in brackets JUNK DATA
-                    plainTextContent = plainTextContent.replace(/ *\([^)]*\) */g, '');
-                    if (plainTextContent.length > MAX_CONTENT_LENGTH) {
-                        plainTextContent = plainTextContent.substring(0, MAX_CONTENT_LENGTH) + '...';
-                    }
-                    response += `Content: ${plainTextContent.trim()}`;
-                }
-                response += `URL: ${url}`;
-
-                // Get the index of the last message in the array
-                const lastMessageIndex = conversation.messages.length - 1;
-
-                // Append a new line and the new content to the existing content of the last message
-                conversation.messages[lastMessageIndex].content += "\n" + response;
-
-                console.log("A URL was provided, response: " + response)
-
-            } catch (err) {
-                console.error(err);
-                return sendRand(errorMessages);
-            }
-        }
-    }
-
-    // We need an abort controller to stop our progress message editor
+async function generateResponse(conversation) {
    const controller = new AbortController();
-    // Set our timeout for the controller
    const timeout = setTimeout(() => {
        controller.abort();
    }, 900000);

-    // Copy our messages from MAP
    const messagesCopy = [...conversation.messages]; // create a copy of the messages array

-    let time = 0
-    // define a function that shows the system load percentage and updates the message
-    const showSystemLoad = async () => {
-        // Configure our inital time
-        time = Number(time) + Number(process.env.REFRESH_INTERVAL);
-        // Get system stats
-        cpuStat.usagePercent(function (err, percent, seconds) {
-            if (err) {
-                return console.log(err);
-            }
-            // Setting out system stat vars
-            const systemLoad = percent;
-            const freeMemory = os.freemem() / 1024 / 1024 / 1024;
-            const totalMemory = os.totalmem() / 1024 / 1024 / 1024;
-            const usedMemory = totalMemory - freeMemory;
-
-            // lets build some embed data
-            let embedData;
-
-            // If we have NO GPU config lets send system stats only
-            if (process.env.GPU == 0) {
-                embedData = {
-                    color: 0x0099ff,
-                    title: 'Please wait.. I am thinking...',
-                    fields: [
-                        {
-                            name: 'System Load',
-                            value: `${systemLoad.toFixed(2)}%`,
-                        },
-                        {
-                            name: 'Memory Usage',
-                            value: `${usedMemory.toFixed(2)} GB / ${totalMemory.toFixed(2)} GB`,
-                        },
-                        {
-                            name: 'Time',
-                            value: `~${time} seconds.`,
-                        },
-                    ],
-                };
-                // if the message object doesn't exist, create it
-                if (!botMessage) {
-                    (async () => {
-                        if (time == 0) return
-                        botMessage = await message.channel.send({ embeds: [embedData] });
-                    })();
-                } else {
-                    (async () => {
-                        if (!isAnyConversationBusy()) {
-                            botMessage.delete()
-                        } else {
-                            await botMessage.edit({ embeds: [embedData] }); // otherwise, update the message
-                        }
-                    })();
-                }
-            } else {
-                // If we do have GPU=1 lets send some card info too!
-                smi(function (err, data) {
-                    if (err) {
-                        // Handle error if smi function fails
-                        console.error(err);
-                        return;
-                    }
-
-                    let utilization = data.nvidia_smi_log.gpu.utilization;
-                    let gpuUtilization = utilization.gpu_util;
-                    let memoryUtilization = utilization.memory_util;
-                    let gpuTemp = data.nvidia_smi_log.gpu.temperature.gpu_temp;
-
-                    // These are not used until nvidia-docker fixes their support
-                    let gpuTarget = data.nvidia_smi_log.gpu.temperature.gpu_target_temperature;
-                    let gpuFanSpeed = data.nvidia_smi_log.gpu.fan_speed;
-                    embedData = {
-                        color: 0x0099ff,
-                        title: 'Please wait.. I am thinking...',
-                        fields: [
-                            {
-                                name: 'System Load',
-                                value: `${systemLoad.toFixed(2)}%`,
-                            },
-                            {
-                                name: 'Memory Usage',
-                                value: `${usedMemory.toFixed(2)} GB / ${totalMemory.toFixed(2)} GB`,
-                            },
-                            {
-                                name: 'GPU Utilization',
-                                value: `${gpuUtilization}`,
-                            },
-                            {
-                                name: 'Memory Utilization',
-                                value: `${memoryUtilization}`,
-                            },
-                            {
-                                name: 'GPU Temperature',
-                                value: `${gpuTemp}`,
-                            },
-                            {
-                                name: 'Time',
-                                value: `~${time} seconds.`,
-                            },
-                        ],
-                    };
-                    // if the message object doesn't exist, create it
-                    if (!botMessage) {
-                        (async () => {
-                            if (time == 0) return
-                            botMessage = await message.channel.send({ embeds: [embedData] });
-                        })();
-                    } else {
-                        (async () => {
-                            if (!isAnyConversationBusy()) {
-                                botMessage.delete()
-                            } else {
-                                await botMessage.edit({ embeds: [embedData] }); // otherwise, update the message
-                            }
-                        })();
-                    }
-                })
-            }
-        });
-    };
+    console.log(conversation)

    try {
-
-        // call the function initially
-        await showSystemLoad();
-
-        // Grab the REFRESH_INTERVAL from ENV if not exist, lets use 7 (seconds)
-        const refreshInterval = setInterval(showSystemLoad, (process.env.REFRESH_INTERVAL || 7) * 1000);
-
-        function countLlamaTokens(messages) {
-            let totalTokens = 0;
-
-            for (const message of messages) {
-                if (message.role === 'user' || message.role === 'assistant') {
-                    const encodedTokens = llamaTokenizer.encode(message.content);
-                    totalTokens += encodedTokens.length;
-                }
-            }
-
-            return totalTokens;
-        }
-
-        let totalTokens = countLlamaTokens(messagesCopy);
-        console.log(`Total Llama tokens: ${totalTokens}`);
-        let tokenLength = totalTokens
-
-        // Remove older conversations if necessary
-        const maxLength = 1800;
-        const tolerance = 25; // allow for some flexibility in the token length
-        if (tokenLength > maxLength + tolerance) {
-            const diff = tokenLength - (maxLength + tolerance);
-            let removedTokens = 0;
-
-            // Iterate over the messages in reverse order
-            for (let i = messagesCopy.length - 1; i >= 0; i--) {
-                const message = messagesCopy[i];
-                const messageTokens = countLlamaTokens([message]);
-
-                // Check if the current message plus the tokens in the message is less than or equal to the diff
-                if (removedTokens + messageTokens <= diff) {
-                    messagesCopy.splice(i, 1);
-                    removedTokens += messageTokens;
-                    console.log(removedTokens + " removed \nAfter Resize: " + countLlamaTokens(messagesCopy));
-                } else {
-                    // Remove more than one message if necessary to bring the total length below the maximum allowed length
-                    const messagesToRemove = Math.floor(diff / messageTokens);
-                    for (let j = 0; j < messagesToRemove; j++) {
-                        messagesCopy.splice(i, 1);
-                        removedTokens += messageTokens;
-                    }
-                    break;
-                }
-            }
-        }
-        // Sending request to our API
        const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
            method: 'POST',
            headers: {
@ -488,42 +181,25 @@ async function generateResponse(conversation, message) {
                'Content-Type': 'application/json'
            },
            body: JSON.stringify({
-                messages: messagesCopy,
-                max_tokens: Number(process.env.MAX_TOKENS), // add the max_tokens parameter here
-                repeat_penalty: Number(process.env.REPEAT_PENALTY)
-
+                messages: messagesCopy // use the copy of the messages array
            }),
            signal: controller.signal
        });

        const responseData = await response.json();
-        console.log(JSON.stringify(responseData));
+        console.log(JSON.stringify(responseData))
        const choice = responseData.choices[0];

-        const responseText = choice.message.content;
-
-        // clear the interval, replace the "please wait" message with the response, and update the message
-        console.log(responseText);
-        try {
-        if (time > 2) {
-        await botMessage.delete();
-        clearInterval(refreshInterval);
-        botMessage = null;
-        console.log("Time limit reached. Message deleted.");
-       }
-      } catch (err) {
-      console.log("Error deleting message: ", err);
-     }
-
-
-        return responseText;
+        // Remove "user None:" and any text after it from the response
+        const responseText = choice.message.content.trim();
+        const startIndex = responseText.indexOf('user None:');
+        const sanitizedResponse = startIndex === -1 ? responseText : responseText.substring(0, startIndex);

+        return sanitizedResponse;
    } catch (err) {
        throw err;
    } finally {
        clearTimeout(timeout);
-        botMessage = null;
-        time = 0
    }
 }

--- a/package-lock.json
+++ b/package-lock.json
@ -1,757 +0,0 @@
-{
-  "name": "llama-cpp-python-djs-bot",
-  "version": "1.0.0",
-  "lockfileVersion": 3,
-  "requires": true,
-  "packages": {
-    "": {
-      "name": "llama-cpp-python-djs-bot",
-      "version": "1.0.0",
-      "license": "ISC",
-      "dependencies": {
-        "cheerio": "^1.0.0-rc.12",
-        "cpu-stat": "^2.0.1",
-        "discord.js": "^14.9.0",
-        "dotenv": "^16.0.3",
-        "gpt-tokenizer": "^2.1.1",
-        "llama-tokenizer-js": "^1.0.0",
-        "node-fetch": "^3.3.1",
-        "node-nvidia-smi": "^1.0.0",
-        "os": "^0.1.2",
-        "tiktoken": "^1.0.10"
-      }
-    },
-    "node_modules/@discordjs/builders": {
-      "version": "1.6.3",
-      "resolved": "https://registry.npmjs.org/@discordjs/builders/-/builders-1.6.3.tgz",
-      "integrity": "sha512-CTCh8NqED3iecTNuiz49mwSsrc2iQb4d0MjMdmS/8pb69Y4IlzJ/DIy/p5GFlgOrFbNO2WzMHkWKQSiJ3VNXaw==",
-      "dependencies": {
-        "@discordjs/formatters": "^0.3.1",
-        "@discordjs/util": "^0.3.1",
-        "@sapphire/shapeshift": "^3.8.2",
-        "discord-api-types": "^0.37.41",
-        "fast-deep-equal": "^3.1.3",
-        "ts-mixer": "^6.0.3",
-        "tslib": "^2.5.0"
-      },
-      "engines": {
-        "node": ">=16.9.0"
-      }
-    },
-    "node_modules/@discordjs/collection": {
-      "version": "1.5.1",
-      "resolved": "https://registry.npmjs.org/@discordjs/collection/-/collection-1.5.1.tgz",
-      "integrity": "sha512-aWEc9DCf3TMDe9iaJoOnO2+JVAjeRNuRxPZQA6GVvBf+Z3gqUuWYBy2NWh4+5CLYq5uoc3MOvUQ5H5m8CJBqOA==",
-      "engines": {
-        "node": ">=16.9.0"
-      }
-    },
-    "node_modules/@discordjs/formatters": {
-      "version": "0.3.1",
-      "resolved": "https://registry.npmjs.org/@discordjs/formatters/-/formatters-0.3.1.tgz",
-      "integrity": "sha512-M7X4IGiSeh4znwcRGcs+49B5tBkNDn4k5bmhxJDAUhRxRHTiFAOTVUNQ6yAKySu5jZTnCbSvTYHW3w0rAzV1MA==",
-      "dependencies": {
-        "discord-api-types": "^0.37.41"
-      },
-      "engines": {
-        "node": ">=16.9.0"
-      }
-    },
-    "node_modules/@discordjs/rest": {
-      "version": "1.7.1",
-      "resolved": "https://registry.npmjs.org/@discordjs/rest/-/rest-1.7.1.tgz",
-      "integrity": "sha512-Ofa9UqT0U45G/eX86cURQnX7gzOJLG2oC28VhIk/G6IliYgQF7jFByBJEykPSHE4MxPhqCleYvmsrtfKh1nYmQ==",
-      "dependencies": {
-        "@discordjs/collection": "^1.5.1",
-        "@discordjs/util": "^0.3.0",
-        "@sapphire/async-queue": "^1.5.0",
-        "@sapphire/snowflake": "^3.4.2",
-        "discord-api-types": "^0.37.41",
-        "file-type": "^18.3.0",
-        "tslib": "^2.5.0",
-        "undici": "^5.22.0"
-      },
-      "engines": {
-        "node": ">=16.9.0"
-      }
-    },
-    "node_modules/@discordjs/util": {
-      "version": "0.3.1",
-      "resolved": "https://registry.npmjs.org/@discordjs/util/-/util-0.3.1.tgz",
-      "integrity": "sha512-HxXKYKg7vohx2/OupUN/4Sd02Ev3PBJ5q0gtjdcvXb0ErCva8jNHWfe/v5sU3UKjIB/uxOhc+TDOnhqffj9pRA==",
-      "engines": {
-        "node": ">=16.9.0"
-      }
-    },
-    "node_modules/@discordjs/ws": {
-      "version": "0.8.3",
-      "resolved": "https://registry.npmjs.org/@discordjs/ws/-/ws-0.8.3.tgz",
-      "integrity": "sha512-hcYtppanjHecbdNyCKQNH2I4RP9UrphDgmRgLYrATEQF1oo4sYSve7ZmGsBEXSzH72MO2tBPdWSThunbxUVk0g==",
-      "dependencies": {
-        "@discordjs/collection": "^1.5.1",
-        "@discordjs/rest": "^1.7.1",
-        "@discordjs/util": "^0.3.1",
-        "@sapphire/async-queue": "^1.5.0",
-        "@types/ws": "^8.5.4",
-        "@vladfrangu/async_event_emitter": "^2.2.1",
-        "discord-api-types": "^0.37.41",
-        "tslib": "^2.5.0",
-        "ws": "^8.13.0"
-      },
-      "engines": {
-        "node": ">=16.9.0"
-      }
-    },
-    "node_modules/@sapphire/async-queue": {
-      "version": "1.5.0",
-      "resolved": "https://registry.npmjs.org/@sapphire/async-queue/-/async-queue-1.5.0.tgz",
-      "integrity": "sha512-JkLdIsP8fPAdh9ZZjrbHWR/+mZj0wvKS5ICibcLrRI1j84UmLMshx5n9QmL8b95d4onJ2xxiyugTgSAX7AalmA==",
-      "engines": {
-        "node": ">=v14.0.0",
-        "npm": ">=7.0.0"
-      }
-    },
-    "node_modules/@sapphire/shapeshift": {
-      "version": "3.9.2",
-      "resolved": "https://registry.npmjs.org/@sapphire/shapeshift/-/shapeshift-3.9.2.tgz",
-      "integrity": "sha512-YRbCXWy969oGIdqR/wha62eX8GNHsvyYi0Rfd4rNW6tSVVa8p0ELiMEuOH/k8rgtvRoM+EMV7Csqz77YdwiDpA==",
-      "dependencies": {
-        "fast-deep-equal": "^3.1.3",
-        "lodash": "^4.17.21"
-      },
-      "engines": {
-        "node": ">=v14.0.0",
-        "npm": ">=7.0.0"
-      }
-    },
-    "node_modules/@sapphire/snowflake": {
-      "version": "3.5.1",
-      "resolved": "https://registry.npmjs.org/@sapphire/snowflake/-/snowflake-3.5.1.tgz",
-      "integrity": "sha512-BxcYGzgEsdlG0dKAyOm0ehLGm2CafIrfQTZGWgkfKYbj+pNNsorZ7EotuZukc2MT70E0UbppVbtpBrqpzVzjNA==",
-      "engines": {
-        "node": ">=v14.0.0",
-        "npm": ">=7.0.0"
-      }
-    },
-    "node_modules/@tokenizer/token": {
-      "version": "0.3.0",
-      "resolved": "https://registry.npmjs.org/@tokenizer/token/-/token-0.3.0.tgz",
-      "integrity": "sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A=="
-    },
-    "node_modules/@types/node": {
-      "version": "20.3.0",
-      "resolved": "https://registry.npmjs.org/@types/node/-/node-20.3.0.tgz",
-      "integrity": "sha512-cumHmIAf6On83X7yP+LrsEyUOf/YlociZelmpRYaGFydoaPdxdt80MAbu6vWerQT2COCp2nPvHdsbD7tHn/YlQ=="
-    },
-    "node_modules/@types/ws": {
-      "version": "8.5.5",
-      "resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.5.5.tgz",
-      "integrity": "sha512-lwhs8hktwxSjf9UaZ9tG5M03PGogvFaH8gUgLNbN9HKIg0dvv6q+gkSuJ8HN4/VbyxkuLzCjlN7GquQ0gUJfIg==",
-      "dependencies": {
-        "@types/node": "*"
-      }
-    },
-    "node_modules/@vladfrangu/async_event_emitter": {
-      "version": "2.2.2",
-      "resolved": "https://registry.npmjs.org/@vladfrangu/async_event_emitter/-/async_event_emitter-2.2.2.tgz",
-      "integrity": "sha512-HIzRG7sy88UZjBJamssEczH5q7t5+axva19UbZLO6u0ySbYPrwzWiXBcC0WuHyhKKoeCyneH+FvYzKQq/zTtkQ==",
-      "engines": {
-        "node": ">=v14.0.0",
-        "npm": ">=7.0.0"
-      }
-    },
-    "node_modules/boolbase": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
-      "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww=="
-    },
-    "node_modules/busboy": {
-      "version": "1.6.0",
-      "resolved": "https://registry.npmjs.org/busboy/-/busboy-1.6.0.tgz",
-      "integrity": "sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA==",
-      "dependencies": {
-        "streamsearch": "^1.1.0"
-      },
-      "engines": {
-        "node": ">=10.16.0"
-      }
-    },
-    "node_modules/cheerio": {
-      "version": "1.0.0-rc.12",
-      "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0-rc.12.tgz",
-      "integrity": "sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==",
-      "dependencies": {
-        "cheerio-select": "^2.1.0",
-        "dom-serializer": "^2.0.0",
-        "domhandler": "^5.0.3",
-        "domutils": "^3.0.1",
-        "htmlparser2": "^8.0.1",
-        "parse5": "^7.0.0",
-        "parse5-htmlparser2-tree-adapter": "^7.0.0"
-      },
-      "engines": {
-        "node": ">= 6"
-      },
-      "funding": {
-        "url": "https://github.com/cheeriojs/cheerio?sponsor=1"
-      }
-    },
-    "node_modules/cheerio-select": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz",
-      "integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==",
-      "dependencies": {
-        "boolbase": "^1.0.0",
-        "css-select": "^5.1.0",
-        "css-what": "^6.1.0",
-        "domelementtype": "^2.3.0",
-        "domhandler": "^5.0.3",
-        "domutils": "^3.0.1"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/fb55"
-      }
-    },
-    "node_modules/cpu-stat": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/cpu-stat/-/cpu-stat-2.0.1.tgz",
-      "integrity": "sha512-bC4ts/0IjYfNV6Dc7F2NauWM0tip0fneZjRek8HqX2ZERC4oSt6dmV+GTN1mfE9OKbLAppv58M2PVzKLGB731w=="
-    },
-    "node_modules/css-select": {
-      "version": "5.1.0",
-      "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.1.0.tgz",
-      "integrity": "sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==",
-      "dependencies": {
-        "boolbase": "^1.0.0",
-        "css-what": "^6.1.0",
-        "domhandler": "^5.0.2",
-        "domutils": "^3.0.1",
-        "nth-check": "^2.0.1"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/fb55"
-      }
-    },
-    "node_modules/css-what": {
-      "version": "6.1.0",
-      "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz",
-      "integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==",
-      "engines": {
-        "node": ">= 6"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/fb55"
-      }
-    },
-    "node_modules/data-uri-to-buffer": {
-      "version": "4.0.1",
-      "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz",
-      "integrity": "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==",
-      "engines": {
-        "node": ">= 12"
-      }
-    },
-    "node_modules/discord-api-types": {
-      "version": "0.37.43",
-      "resolved": "https://registry.npmjs.org/discord-api-types/-/discord-api-types-0.37.43.tgz",
-      "integrity": "sha512-bBhDWU3TF9KADxR/mHp1K4Bvu/LRtFQdGyBjADu4e66F3ZnD4kp12W/SJCttIaCcMXzPV3sfty6eDGRNRph51Q=="
-    },
-    "node_modules/discord.js": {
-      "version": "14.11.0",
-      "resolved": "https://registry.npmjs.org/discord.js/-/discord.js-14.11.0.tgz",
-      "integrity": "sha512-CkueWYFQ28U38YPR8HgsBR/QT35oPpMbEsTNM30Fs8loBIhnA4s70AwQEoy6JvLcpWWJO7GY0y2BUzZmuBMepQ==",
-      "dependencies": {
-        "@discordjs/builders": "^1.6.3",
-        "@discordjs/collection": "^1.5.1",
-        "@discordjs/formatters": "^0.3.1",
-        "@discordjs/rest": "^1.7.1",
-        "@discordjs/util": "^0.3.1",
-        "@discordjs/ws": "^0.8.3",
-        "@sapphire/snowflake": "^3.4.2",
-        "@types/ws": "^8.5.4",
-        "discord-api-types": "^0.37.41",
-        "fast-deep-equal": "^3.1.3",
-        "lodash.snakecase": "^4.1.1",
-        "tslib": "^2.5.0",
-        "undici": "^5.22.0",
-        "ws": "^8.13.0"
-      },
-      "engines": {
-        "node": ">=16.9.0"
-      }
-    },
-    "node_modules/dom-serializer": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz",
-      "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==",
-      "dependencies": {
-        "domelementtype": "^2.3.0",
-        "domhandler": "^5.0.2",
-        "entities": "^4.2.0"
-      },
-      "funding": {
-        "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1"
-      }
-    },
-    "node_modules/domelementtype": {
-      "version": "2.3.0",
-      "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz",
-      "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/fb55"
-        }
-      ]
-    },
-    "node_modules/domhandler": {
-      "version": "5.0.3",
-      "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz",
-      "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==",
-      "dependencies": {
-        "domelementtype": "^2.3.0"
-      },
-      "engines": {
-        "node": ">= 4"
-      },
-      "funding": {
-        "url": "https://github.com/fb55/domhandler?sponsor=1"
-      }
-    },
-    "node_modules/domutils": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.1.0.tgz",
-      "integrity": "sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==",
-      "dependencies": {
-        "dom-serializer": "^2.0.0",
-        "domelementtype": "^2.3.0",
-        "domhandler": "^5.0.3"
-      },
-      "funding": {
-        "url": "https://github.com/fb55/domutils?sponsor=1"
-      }
-    },
-    "node_modules/dotenv": {
-      "version": "16.1.4",
-      "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.1.4.tgz",
-      "integrity": "sha512-m55RtE8AsPeJBpOIFKihEmqUcoVncQIwo7x9U8ZwLEZw9ZpXboz2c+rvog+jUaJvVrZ5kBOeYQBX5+8Aa/OZQw==",
-      "engines": {
-        "node": ">=12"
-      },
-      "funding": {
-        "url": "https://github.com/motdotla/dotenv?sponsor=1"
-      }
-    },
-    "node_modules/entities": {
-      "version": "4.5.0",
-      "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
-      "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
-      "engines": {
-        "node": ">=0.12"
-      },
-      "funding": {
-        "url": "https://github.com/fb55/entities?sponsor=1"
-      }
-    },
-    "node_modules/fast-deep-equal": {
-      "version": "3.1.3",
-      "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
-      "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q=="
-    },
-    "node_modules/fetch-blob": {
-      "version": "3.2.0",
-      "resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.2.0.tgz",
-      "integrity": "sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/jimmywarting"
-        },
-        {
-          "type": "paypal",
-          "url": "https://paypal.me/jimmywarting"
-        }
-      ],
-      "dependencies": {
-        "node-domexception": "^1.0.0",
-        "web-streams-polyfill": "^3.0.3"
-      },
-      "engines": {
-        "node": "^12.20 || >= 14.13"
-      }
-    },
-    "node_modules/file-type": {
-      "version": "18.5.0",
-      "resolved": "https://registry.npmjs.org/file-type/-/file-type-18.5.0.tgz",
-      "integrity": "sha512-yvpl5U868+V6PqXHMmsESpg6unQ5GfnPssl4dxdJudBrr9qy7Fddt7EVX1VLlddFfe8Gj9N7goCZH22FXuSQXQ==",
-      "dependencies": {
-        "readable-web-to-node-stream": "^3.0.2",
-        "strtok3": "^7.0.0",
-        "token-types": "^5.0.1"
-      },
-      "engines": {
-        "node": ">=14.16"
-      },
-      "funding": {
-        "url": "https://github.com/sindresorhus/file-type?sponsor=1"
-      }
-    },
-    "node_modules/formdata-polyfill": {
-      "version": "4.0.10",
-      "resolved": "https://registry.npmjs.org/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz",
-      "integrity": "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==",
-      "dependencies": {
-        "fetch-blob": "^3.1.2"
-      },
-      "engines": {
-        "node": ">=12.20.0"
-      }
-    },
-    "node_modules/gpt-tokenizer": {
-      "version": "2.1.1",
-      "resolved": "https://registry.npmjs.org/gpt-tokenizer/-/gpt-tokenizer-2.1.1.tgz",
-      "integrity": "sha512-WlX+vj6aPaZ71U6Bf18fem+5k58zlgh2a4nbc7KHy6aGVIyq3nCh709b/8momu34sV/5t/SpzWi8LayWD9uyDw==",
-      "dependencies": {
-        "rfc4648": "^1.5.2"
-      }
-    },
-    "node_modules/htmlparser2": {
-      "version": "8.0.2",
-      "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-8.0.2.tgz",
-      "integrity": "sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==",
-      "funding": [
-        "https://github.com/fb55/htmlparser2?sponsor=1",
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/fb55"
-        }
-      ],
-      "dependencies": {
-        "domelementtype": "^2.3.0",
-        "domhandler": "^5.0.3",
-        "domutils": "^3.0.1",
-        "entities": "^4.4.0"
-      }
-    },
-    "node_modules/ieee754": {
-      "version": "1.2.1",
-      "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
-      "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ]
-    },
-    "node_modules/inherits": {
-      "version": "2.0.4",
-      "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
-      "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
-    },
-    "node_modules/llama-tokenizer-js": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/llama-tokenizer-js/-/llama-tokenizer-js-1.0.0.tgz",
-      "integrity": "sha512-O2FsnoXwOsbrM91bd5iX6cPDJvKwvIRghwhhdgGJr4rxy3Ap9QznORqIJHjTqwy9JF1jiqP6sARo0pB6ojW/Cg=="
-    },
-    "node_modules/lodash": {
-      "version": "4.17.21",
-      "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
-      "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
-    },
-    "node_modules/lodash.snakecase": {
-      "version": "4.1.1",
-      "resolved": "https://registry.npmjs.org/lodash.snakecase/-/lodash.snakecase-4.1.1.tgz",
-      "integrity": "sha512-QZ1d4xoBHYUeuouhEq3lk3Uq7ldgyFXGBhg04+oRLnIz8o9T65Eh+8YdroUwn846zchkA9yDsDl5CVVaV2nqYw=="
-    },
-    "node_modules/node-domexception": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
-      "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/jimmywarting"
-        },
-        {
-          "type": "github",
-          "url": "https://paypal.me/jimmywarting"
-        }
-      ],
-      "engines": {
-        "node": ">=10.5.0"
-      }
-    },
-    "node_modules/node-fetch": {
-      "version": "3.3.1",
-      "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.3.1.tgz",
-      "integrity": "sha512-cRVc/kyto/7E5shrWca1Wsea4y6tL9iYJE5FBCius3JQfb/4P4I295PfhgbJQBLTx6lATE4z+wK0rPM4VS2uow==",
-      "dependencies": {
-        "data-uri-to-buffer": "^4.0.0",
-        "fetch-blob": "^3.1.4",
-        "formdata-polyfill": "^4.0.10"
-      },
-      "engines": {
-        "node": "^12.20.0 || ^14.13.1 || >=16.0.0"
-      },
-      "funding": {
-        "type": "opencollective",
-        "url": "https://opencollective.com/node-fetch"
-      }
-    },
-    "node_modules/node-nvidia-smi": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/node-nvidia-smi/-/node-nvidia-smi-1.0.0.tgz",
-      "integrity": "sha512-vjgWll2M/FWihNPKKbhWKSfbk57WrpVv1XOR5iuIxIHNieTUuJ0HrDIZj5gsJ63dx2eTbm/dWm+fWIPfYm+D3Q==",
-      "dependencies": {
-        "xml2js": "^0.4.17"
-      },
-      "bin": {
-        "node-nvidia-smi": "node-nvidia-smi.js"
-      }
-    },
-    "node_modules/nth-check": {
-      "version": "2.1.1",
-      "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz",
-      "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==",
-      "dependencies": {
-        "boolbase": "^1.0.0"
-      },
-      "funding": {
-        "url": "https://github.com/fb55/nth-check?sponsor=1"
-      }
-    },
-    "node_modules/os": {
-      "version": "0.1.2",
-      "resolved": "https://registry.npmjs.org/os/-/os-0.1.2.tgz",
-      "integrity": "sha512-ZoXJkvAnljwvc56MbvhtKVWmSkzV712k42Is2mA0+0KTSRakq5XXuXpjZjgAt9ctzl51ojhQWakQQpmOvXWfjQ=="
-    },
-    "node_modules/parse5": {
-      "version": "7.1.2",
-      "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.1.2.tgz",
-      "integrity": "sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==",
-      "dependencies": {
-        "entities": "^4.4.0"
-      },
-      "funding": {
-        "url": "https://github.com/inikulin/parse5?sponsor=1"
-      }
-    },
-    "node_modules/parse5-htmlparser2-tree-adapter": {
-      "version": "7.0.0",
-      "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.0.0.tgz",
-      "integrity": "sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g==",
-      "dependencies": {
-        "domhandler": "^5.0.2",
-        "parse5": "^7.0.0"
-      },
-      "funding": {
-        "url": "https://github.com/inikulin/parse5?sponsor=1"
-      }
-    },
-    "node_modules/peek-readable": {
-      "version": "5.0.0",
-      "resolved": "https://registry.npmjs.org/peek-readable/-/peek-readable-5.0.0.tgz",
-      "integrity": "sha512-YtCKvLUOvwtMGmrniQPdO7MwPjgkFBtFIrmfSbYmYuq3tKDV/mcfAhBth1+C3ru7uXIZasc/pHnb+YDYNkkj4A==",
-      "engines": {
-        "node": ">=14.16"
-      },
-      "funding": {
-        "type": "github",
-        "url": "https://github.com/sponsors/Borewit"
-      }
-    },
-    "node_modules/readable-stream": {
-      "version": "3.6.2",
-      "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
-      "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==",
-      "dependencies": {
-        "inherits": "^2.0.3",
-        "string_decoder": "^1.1.1",
-        "util-deprecate": "^1.0.1"
-      },
-      "engines": {
-        "node": ">= 6"
-      }
-    },
-    "node_modules/readable-web-to-node-stream": {
-      "version": "3.0.2",
-      "resolved": "https://registry.npmjs.org/readable-web-to-node-stream/-/readable-web-to-node-stream-3.0.2.tgz",
-      "integrity": "sha512-ePeK6cc1EcKLEhJFt/AebMCLL+GgSKhuygrZ/GLaKZYEecIgIECf4UaUuaByiGtzckwR4ain9VzUh95T1exYGw==",
-      "dependencies": {
-        "readable-stream": "^3.6.0"
-      },
-      "engines": {
-        "node": ">=8"
-      },
-      "funding": {
-        "type": "github",
-        "url": "https://github.com/sponsors/Borewit"
-      }
-    },
-    "node_modules/rfc4648": {
-      "version": "1.5.2",
-      "resolved": "https://registry.npmjs.org/rfc4648/-/rfc4648-1.5.2.tgz",
-      "integrity": "sha512-tLOizhR6YGovrEBLatX1sdcuhoSCXddw3mqNVAcKxGJ+J0hFeJ+SjeWCv5UPA/WU3YzWPPuCVYgXBKZUPGpKtg=="
-    },
-    "node_modules/safe-buffer": {
-      "version": "5.2.1",
-      "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
-      "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ]
-    },
-    "node_modules/sax": {
-      "version": "1.2.4",
-      "resolved": "https://registry.npmjs.org/sax/-/sax-1.2.4.tgz",
-      "integrity": "sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw=="
-    },
-    "node_modules/streamsearch": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-1.1.0.tgz",
-      "integrity": "sha512-Mcc5wHehp9aXz1ax6bZUyY5afg9u2rv5cqQI3mRrYkGC8rW2hM02jWuwjtL++LS5qinSyhj2QfLyNsuc+VsExg==",
-      "engines": {
-        "node": ">=10.0.0"
-      }
-    },
-    "node_modules/string_decoder": {
-      "version": "1.3.0",
-      "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
-      "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==",
-      "dependencies": {
-        "safe-buffer": "~5.2.0"
-      }
-    },
-    "node_modules/strtok3": {
-      "version": "7.0.0",
-      "resolved": "https://registry.npmjs.org/strtok3/-/strtok3-7.0.0.tgz",
-      "integrity": "sha512-pQ+V+nYQdC5H3Q7qBZAz/MO6lwGhoC2gOAjuouGf/VO0m7vQRh8QNMl2Uf6SwAtzZ9bOw3UIeBukEGNJl5dtXQ==",
-      "dependencies": {
-        "@tokenizer/token": "^0.3.0",
-        "peek-readable": "^5.0.0"
-      },
-      "engines": {
-        "node": ">=14.16"
-      },
-      "funding": {
-        "type": "github",
-        "url": "https://github.com/sponsors/Borewit"
-      }
-    },
-    "node_modules/tiktoken": {
-      "version": "1.0.10",
-      "resolved": "https://registry.npmjs.org/tiktoken/-/tiktoken-1.0.10.tgz",
-      "integrity": "sha512-gF8ndTCNu7WcRFbl1UUWaFIB4CTXmHzS3tRYdyUYF7x3C6YR6Evoao4zhKDmWIwv2PzNbzoQMV8Pxt+17lEDbA=="
-    },
-    "node_modules/token-types": {
-      "version": "5.0.1",
-      "resolved": "https://registry.npmjs.org/token-types/-/token-types-5.0.1.tgz",
-      "integrity": "sha512-Y2fmSnZjQdDb9W4w4r1tswlMHylzWIeOKpx0aZH9BgGtACHhrk3OkT52AzwcuqTRBZtvvnTjDBh8eynMulu8Vg==",
-      "dependencies": {
-        "@tokenizer/token": "^0.3.0",
-        "ieee754": "^1.2.1"
-      },
-      "engines": {
-        "node": ">=14.16"
-      },
-      "funding": {
-        "type": "github",
-        "url": "https://github.com/sponsors/Borewit"
-      }
-    },
-    "node_modules/ts-mixer": {
-      "version": "6.0.3",
-      "resolved": "https://registry.npmjs.org/ts-mixer/-/ts-mixer-6.0.3.tgz",
-      "integrity": "sha512-k43M7uCG1AkTyxgnmI5MPwKoUvS/bRvLvUb7+Pgpdlmok8AoqmUaZxUUw8zKM5B1lqZrt41GjYgnvAi0fppqgQ=="
-    },
-    "node_modules/tslib": {
-      "version": "2.5.3",
-      "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz",
-      "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w=="
-    },
-    "node_modules/undici": {
-      "version": "5.22.1",
-      "resolved": "https://registry.npmjs.org/undici/-/undici-5.22.1.tgz",
-      "integrity": "sha512-Ji2IJhFXZY0x/0tVBXeQwgPlLWw13GVzpsWPQ3rV50IFMMof2I55PZZxtm4P6iNq+L5znYN9nSTAq0ZyE6lSJw==",
-      "dependencies": {
-        "busboy": "^1.6.0"
-      },
-      "engines": {
-        "node": ">=14.0"
-      }
-    },
-    "node_modules/util-deprecate": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
-      "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw=="
-    },
-    "node_modules/web-streams-polyfill": {
-      "version": "3.2.1",
-      "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.2.1.tgz",
-      "integrity": "sha512-e0MO3wdXWKrLbL0DgGnUV7WHVuw9OUvL4hjgnPkIeEvESk74gAITi5G606JtZPp39cd8HA9VQzCIvA49LpPN5Q==",
-      "engines": {
-        "node": ">= 8"
-      }
-    },
-    "node_modules/ws": {
-      "version": "8.13.0",
-      "resolved": "https://registry.npmjs.org/ws/-/ws-8.13.0.tgz",
-      "integrity": "sha512-x9vcZYTrFPC7aSIbj7sRCYo7L/Xb8Iy+pW0ng0wt2vCJv7M9HOMy0UoN3rr+IFC7hb7vXoqS+P9ktyLLLhO+LA==",
-      "engines": {
-        "node": ">=10.0.0"
-      },
-      "peerDependencies": {
-        "bufferutil": "^4.0.1",
-        "utf-8-validate": ">=5.0.2"
-      },
-      "peerDependenciesMeta": {
-        "bufferutil": {
-          "optional": true
-        },
-        "utf-8-validate": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/xml2js": {
-      "version": "0.4.23",
-      "resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.4.23.tgz",
-      "integrity": "sha512-ySPiMjM0+pLDftHgXY4By0uswI3SPKLDw/i3UXbnO8M/p28zqexCUoPmQFrYD+/1BzhGJSs2i1ERWKJAtiLrug==",
-      "dependencies": {
-        "sax": ">=0.6.0",
-        "xmlbuilder": "~11.0.0"
-      },
-      "engines": {
-        "node": ">=4.0.0"
-      }
-    },
-    "node_modules/xmlbuilder": {
-      "version": "11.0.1",
-      "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-11.0.1.tgz",
-      "integrity": "sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==",
-      "engines": {
-        "node": ">=4.0"
-      }
-    }
-  }
-}
--- a/package.json
+++ b/package.json
@ -10,15 +10,8 @@
  "author": "",
  "license": "ISC",
  "dependencies": {
-    "cheerio": "^1.0.0-rc.12",
-    "cpu-stat": "^2.0.1",
    "discord.js": "^14.9.0",
    "dotenv": "^16.0.3",
-    "gpt-tokenizer": "^2.1.1",
-    "llama-tokenizer-js": "^1.0.0",
-    "node-fetch": "^3.3.1",
-    "node-nvidia-smi": "^1.0.0",
-    "os": "^0.1.2",
-    "tiktoken": "^1.0.10"
+    "node-fetch": "^3.3.1"
  }
 }
--- a/server/Dockerfile
+++ b/server/Dockerfile
@ -1,11 +1,11 @@
-FROM ubuntu:latest
+FROM python:bullseye

 RUN apt-get update; \
    apt-get install -y --no-install-recommends \
-    build-essential cmake python3 python3-pip
+    build-essential

 WORKDIR /usr/src/app

-RUN CMAKE_ARGS="-DLLAMA_METAL=on" FORCE_CMAKE=1 pip install llama-cpp-python[server]
+RUN pip install --no-cache-dir llama-cpp-python[server]

 CMD python3 -m llama_cpp.server