Fix up NodeJS install GPU side

Fix up HF config - Needs testing
fix nodejs install for HuggingFace
2023-11-14 18:54:17 -05:00 · 2023-11-14 18:21:35 -05:00 · 2023-11-14 18:19:39 -05:00 · 2023-11-14 18:18:37 -05:00 · 2023-08-21 21:15:10 -04:00 · 2023-08-15 01:41:39 -04:00
11 changed files with 1126 additions and 67 deletions
--- a/Dockerfile.gpu
+++ b/Dockerfile.gpu
@ -0,0 +1,23 @@
 FROM nvidia/cuda:12.1.1-devel-ubuntu20.04
 ENV DEBIAN_FRONTEND noninteractive
 WORKDIR /app
 RUN apt update
 RUN apt install sudo curl -y
 RUN apt-get install -y ca-certificates curl gnupg
 RUN sudo mkdir -p /etc/apt/keyrings
 RUN curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | sudo gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
 ENV NODE_MAJOR=18
 RUN echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | sudo tee /etc/apt/sources.list.d/nodesource.list
 RUN apt-get update
 RUN apt-get install nodejs -y
 COPY package*.json ./
 RUN npm install --omit=dev
 COPY . .
 CMD node llamabot.js
--- a/README.md
+++ b/README.md
@ -78,4 +78,26 @@ This will automatically configure the API for you as well as the bot in two sepe
 5. `docker compose up -d`
 # Docker Compose with GPU
 This will automatically configure the API that supports cuBLAS and GPU inference for you as well as the bot in two seperate containers within a stack.
 NOTE: Caching for GPU has been fixed.
 1. `git clone https://git.ssh.surf/snxraven/llama-cpp-python-djs-bot.git` - Clone the repo
 2. `mv docker-compose.yml docker-compose.nogpu.yml; mv docker-compose.gpu.yml docker-compose.yml;` - Move nongpu compose out of the way, Enable GPU Support
 3. `mv Dockerfile Dockerfile.nongpu; mv Dockerfile.gpu Dockerfile;` - Move nongpu Dockerfile out of the way, enable GPU Support
 3. `cp default.gpu.env .env` - Copy the default GPU .env to its proper location
 4. Set DATA_DIR in .env to the exact location of your model files.
 5. Edit docker-compose.yaml MODEL to ensure the correct model bin is set
 6. set N_GPU_LAYERS to the amount of layers you would like to export to GPU
 7. `docker compose up -d`
 Want to make this better? Issue a pull request!
--- a/default.env
+++ b/default.env
@ -1,14 +1,14 @@
 # Discord Token
-THE_TOKEN = "DISCORD_TOKEN_HERE"
+THE_TOKEN = ""
 # The Channel IDs the bot will operate in seperated by commas
-CHANNEL_IDS = 1094494101631680653,1094628334727614605
+CHANNEL_IDS = 
 # The INIT prompt for all conversations.
-INIT_PROMPT = "Assistant name: ChatAI. You code, write and provide any information without any mistakes."
+INIT_PROMPT = "Assistant name: ChatAI. You code, write and provide any information without any mistakes. You can read basic text from URLs if a user sends a user via scraping."
 # Loading Emebed Refresh Timing
-REFRESH_INTERVAL=10
+REFRESH_INTERVAL=2
 # When a message is too large for discord we chunk the response into seperate messages.
 # To ensure we do not rate limit the bot we send these at a delay interval.
@ -19,30 +19,40 @@ OVERFLOW_DELAY=3
 MAX_CONTENT_LENGTH=2000
 # Max tokens for Generations
-MAX_TOKENS = 1024
+MAX_TOKENS = 1499
 # ROOT_IP is only used when running the bot without docker compose
-ROOT_IP = 192.168.0.15
+ROOT_IP = 127.0.0.1
 # PORT  is only used when running the bot without docker compose
 ROOT_PORT = 8000
 # Directory to your models (llama.cpp specfic settings)
-DATA_DIR = /home/USERNAME/weights
+DATA_DIR = /Users/username/code/models
 # Enable Expirmental Message Caches (Limited to single session)
 # Cache will use ~1.4 GB or MORE of RAM. ONLY ENABLE IF YOUR SYSTEM CAN HANDLE THIS.
 CACHE = 1
 CACHE_TYPE = "disk"
 # Set number of threads to use, currently, a standard thread will utilize 1 whole core
 # I usually will set this between all cores I physcally have OR 2 cores less to allow for other processes.
-N_THREADS = 4
+N_THREADS = 10
 # Always use MMAP unless you know what you are doing
-USE_MMAP=1
+#USE_MMAP=1
 # Only use MLOCK if you know what it does!
 USE_MLOCK=0
 # The higher the number the more hard core.
 REPEAT_PENALTY=1
 # GPU SPECIFIC SETTINGS BELOW
 GPU=0
 N_GPU_LAYERS=32
 PYTHONUNBUFFERED=1
--- a/docker-compose.gpu.yml
+++ b/docker-compose.gpu.yml
@ -0,0 +1,34 @@
 version: '3.9'
 services:
  backend:
    container_name: llama-gpu-server
    restart: unless-stopped
    build:
      context: ./gpu-server
    env_file: .env
    volumes:
      - ${DATA_DIR}:/usr/src/app/models
    environment:
      - HOST=llama-gpu-server
      - MODEL=./models/ggml-vic7b-q5_1.bin.1
      - NVIDIA_VISIBLE_DEVICES=all
    runtime: nvidia
  frontend:
    container_name: llama-djs-bot
    restart: unless-stopped
    build:
      context: .
    depends_on:
      - backend
    environment:
      - THE_TOKEN
      - REFRESH_INTERVAL
      - CHANNEL_IDS
      - GPU
      - ROOT_IP=llama-gpu-server
      - ROOT_PORT=8000
      - INIT_PROMPT='Assistant Name':' ChatAI. You code, write and provide any information without any mistakes.'
      - NVIDIA_VISIBLE_DEVICES=all
    runtime: nvidia
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -11,7 +11,7 @@ services:
      - ${DATA_DIR}:/usr/src/app/models
    environment:
      - HOST=llama-python-server
-      - MODEL=./models/ggml-vic7b-q4_0.bin
+      - MODEL=./models/vicuna-7b-1.1.ggmlv3.q6_K.bin
  llama-python-djs-bot:
    container_name: llama-python-djs-bot
    restart: unless-stopped
@ -21,6 +21,7 @@ services:
      - llama-python-server
    environment:
      - THE_TOKEN
      - GPU
      - REFRESH_INTERVAL
      - CHANNEL_IDS
      - ROOT_IP=llama-python-server
--- a/gpu-server/Dockerfile
+++ b/gpu-server/Dockerfile
@ -0,0 +1,19 @@
 FROM nvidia/cuda:12.1.1-devel-ubuntu20.04
 # Install the deps
 ENV DEBIAN_FRONTEND=noninteractive
 ENV TZ=Etc/GMT
 RUN apt-get update && apt-get install -y --no-install-recommends python3 python3-pip git cmake build-essential
 # Get llama-cpp-python
 WORKDIR /usr/src
 WORKDIR /usr/src/app 
 # Build llama-cpp-python w/CuBLAS
 RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python[server]
 # We need to set the host to 0.0.0.0 to allow outside access
 ENV HOST 0.0.0.0
 # Run the server
 CMD python3 -m llama_cpp.server
--- a/huggingface-config/Dockerfile
+++ b/huggingface-config/Dockerfile
@ -4,7 +4,13 @@ RUN apt update
 RUN DEBIAN_FRONTEND=noninteractive  apt install curl sudo -y
-RUN curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash - 
+RUN apt-get install -y ca-certificates curl gnupg
 RUN sudo mkdir -p /etc/apt/keyrings
 RUN curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | sudo gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
 ENV NODE_MAJOR=18
 RUN echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | sudo tee /etc/apt/sources.list.d/nodesource.list
 RUN apt-get update
 RUN apt-get install nodejs -y 
 RUN DEBIAN_FRONTEND=noninteractive  apt install nodejs -y
@ -30,13 +36,13 @@ RUN npm i
 ENV HOST localhost
 ENV PORT 7860
-ENV MODEL=/code/ggml-vic7b-q4_0.bin
+ENV MODEL=/code/mistral-7b-instruct-v0.1.Q2_K.gguf
 ENV CACHE=1
 ENV USE_MLOCK=0
 ENV REPEAT_PENALTY=1
-ENV MODEL=/code/ggml-vic7b-q4_0.bin
+ENV MODEL=/code/mistral-7b-instruct-v0.1.Q2_K.gguf
 ENV PM2_HOME=/code/.pm2
-RUN wget -q https://huggingface.co/eachadea/ggml-vicuna-7b-1.1/resolve/main/ggml-vic7b-q4_0.bin
+RUN wget -q -O mistral-7b-instruct-v0.1.Q2_K.gguf "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q2_K.gguf?download=true"
 CMD /bin/bash /code/start.sh
--- a/llamabot.js
+++ b/llamabot.js
@ -5,6 +5,8 @@ import { resetResponses, userResetMessages } from './assets/resetMessages.js';
 import { errorMessages, busyResponses } from './assets/errorMessages.js';
 import cpuStat from 'cpu-stat';
 import os from 'os';
 import smi from 'node-nvidia-smi';
 import llamaTokenizer from 'llama-tokenizer-js'
 import {
    Client,
@ -27,8 +29,14 @@ const client = new Client({
 // Grab ChannelIDs from the .env file
 const channelIDs = process.env.CHANNEL_IDS.split(',');
 // Store Conversations in a MAP
 const conversations = new Map();
 let botMessage; // define a variable to hold the message object
 // Set busy function this allows us to set our bot into busy mode
 // locking out all other tasks until the current one is complete
 function setBusy(userId, isBusy) {
    if (conversations.has(userId)) {
        conversations.get(userId).busy = isBusy;
@ -39,6 +47,8 @@ function setBusy(userId, isBusy) {
    }
 }
 // General check, if any conversation is busy
 // If yes, flag it and let us know
 function isAnyConversationBusy() {
    for (const conversation of conversations.values()) {
        if (conversation.busy) {
@ -49,6 +59,7 @@ function isAnyConversationBusy() {
    return false;
 }
 // Setting our precence to busy within the bots status
 function setPresenceBusy() {
    client.user.setPresence({
        activities: [{
@ -59,6 +70,8 @@ function setPresenceBusy() {
    });
 }
 // Setting our precence to ready within the bots status
 function setPresenceOnline() {
    client.user.setPresence({
        activities: [{
@ -70,18 +83,23 @@ function setPresenceOnline() {
 }
 // When we have logged in to discord api
 // Set precence to online.
 client.once('ready', () => {
    console.log('Bot is ready.');
    setPresenceOnline()
 });
 // When a message is sent within discord, lets handle it.
 client.on('messageCreate', async (message) => {
    // Function to send a random message from any array
    async function sendRand(array) {
        const arrayChoice = array[Math.floor(Math.random() * array.length)];
        await message.channel.send(arrayChoice); // give a notification of reset using a human like response.
    }
    // Function to send a random Direct Message from any array
    async function sendRandDM(array) {
        const arrayChoice = array[Math.floor(Math.random() * array.length)];
        await message.author.send(arrayChoice); // give a notification of reset using a human like response.
@ -92,7 +110,8 @@ client.on('messageCreate', async (message) => {
        return;
    }
-    if (message.author.bot) return; // Ignore messages from bots
+    // Always ignore bots!
    if (message.author.bot) return;
    // Check if any conversation is busy
    if (isAnyConversationBusy()) {
@ -102,12 +121,18 @@ client.on('messageCreate', async (message) => {
        sendRandDM(busyResponses);
        return;
    }
    // Set user ID and get our conversation.
    const userID = message.author.id;
    let conversation = conversations.get(userID) || {
        messages: [],
        busy: false
    };
    // If we do not have a conversation, lets generate one.
    // This requires a chatflow for the API.
    // Its better to have a default beginning conversation
    // Providing context for the AI Model.
    if (conversation.messages.length === 0) {
        conversation.messages.push({
            role: 'user',
@ -123,12 +148,15 @@ client.on('messageCreate', async (message) => {
        });
    }
    // If a user needs a reset, we delete their MAP
    if (message.content === '!reset' || message.content === '!r') {
        conversations.delete(userID); // Delete user's conversation map if they request reset
        sendRand(userResetMessages)
        return;
    }
    // Begin processing our conversation, this is our main work flow.
    // Append user message to conversation history
    conversation.messages.push({
        role: 'user',
@ -136,12 +164,18 @@ client.on('messageCreate', async (message) => {
    });
    try {
        // Now we have our conversation set up
        // Lets set precence to busy
        // We also will set our conversations MAP to busy
        // Locking out all other tasks
        setPresenceBusy()
        setBusy(message.author.id, true);
        // Lets start generating the response
        const response = await generateResponse(conversation, message);
-        // Append bot message to conversation history
+        // Append bot message to conversation history when it is ready
        conversation.messages.push({
            role: 'assistant',
            content: response
@ -154,22 +188,26 @@ client.on('messageCreate', async (message) => {
            // if we are over the discord char limit we need chunks...
            if (response.length > limit) {
-              
+                // We are going to check all of the message chunks if our response is too large for discord.
                // We can extend our message size using chunks, the issue? 
                // Users can abuse this feature, we lock this to 15 to avoid API Abuse.
                const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
-                if (chunks.length  >= 15) return await message.channel.send("Response chunks too large. Try again");
+                if (chunks.length >= 40) return await message.channel.send("Response chunks too large. Try again");
                // If we do now have too many chunks, lets send each one using our overflow delay
                for (let i = 0; i < chunks.length; i++) {
                    setTimeout(() => {
                        message.channel.send(chunks[i]);
                    }, i * (process.env.OVERFLOW_DELAY || 3) * 1000); // delay of 3 seconds between each chunk to save on API requests
                }
            } else {
-                // We are good to go, send the response
+                // We are good to go message is not too large for discord, send the response
                await message.channel.send(response.replace("@", ""));
            }
-
+            // We have completed our task, lets go online
            setPresenceOnline()
            // set our conversation MAP to not busy
            setBusy(message.author.id, false);
        } else {
            // Handle empty response here
@ -179,24 +217,30 @@ client.on('messageCreate', async (message) => {
            setPresenceOnline()
            conversation.busy = false;
        }
        conversations.set(userID, conversation); // Update user's conversation map in memory
        console.log(conversation)
        conversations.set(userID, conversation); // Update user's conversation map in memory
        // Print the current conversation as it stands
        console.log(conversation)
    } catch (err) {
        // If we have any errors lets send a response
        console.error(err);
        return sendRand(errorMessages)
    } finally {
        // We are done! Lets finish up going online
        setPresenceOnline()
        setBusy(message.author.id, false);
    }
 });
 // Import cheerio for scraping
 import cheerio from 'cheerio';
 async function generateResponse(conversation, message) {
-
+    // Begin web scraper if a https:// OR http:// URL is detected
    // Check if message contains a URL
    const urlRegex = /(https?:\/\/[^\s]+)/g;
    // Match our REGEX
    const urls = message.content.match(urlRegex);
    if (urls) {
@ -218,15 +262,17 @@ async function generateResponse(conversation, message) {
                    response += `Description: ${pageDescription}\n`;
                }
                if (pageContent) {
                    // Lets check for content and grab only the amount as configured.
                    const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH;
                    let plainTextContent = $('<div>').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' ');
                    // Clean up code remove it from processing
                    const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/;
                    const isCode = codePattern.test(plainTextContent);
                    if (isCode) {
                        plainTextContent = plainTextContent.replace(codePattern, '');
                    }
-                    // Remove anything enclosed in brackets
+                    // Remove anything enclosed in brackets JUNK DATA
                    plainTextContent = plainTextContent.replace(/ *\([^)]*\) */g, '');
                    if (plainTextContent.length > MAX_CONTENT_LENGTH) {
                        plainTextContent = plainTextContent.substring(0, MAX_CONTENT_LENGTH) + '...';
@ -249,65 +295,192 @@ async function generateResponse(conversation, message) {
            }
        }
    }
    // We need an abort controller to stop our progress message editor
    const controller = new AbortController();
    // Set our timeout for the controller
    const timeout = setTimeout(() => {
        controller.abort();
    }, 900000);
    // Copy our messages from MAP
    const messagesCopy = [...conversation.messages]; // create a copy of the messages array
    let botMessage; // define a variable to hold the message object
    let time = 0
    // define a function that shows the system load percentage and updates the message
    const showSystemLoad = async () => {
        // Configure our inital time
        time = Number(time) + Number(process.env.REFRESH_INTERVAL);
        // Get system stats
        cpuStat.usagePercent(function (err, percent, seconds) {
            if (err) {
                return console.log(err);
            }
-
+            // Setting out system stat vars
            const systemLoad = percent;
            const freeMemory = os.freemem() / 1024 / 1024 / 1024;
            const totalMemory = os.totalmem() / 1024 / 1024 / 1024;
            const usedMemory = totalMemory - freeMemory;
-            const embedData = {
+            // lets build some embed data
-                color: 0x0099ff,
+            let embedData;
                title: 'Please wait.. I am thinking...',
                fields: [
                    {
                        name: 'System Load',
                        value: `${systemLoad.toFixed(2)}%`,
                    },
                    {
                        name: 'Memory Usage',
                        value: `${usedMemory.toFixed(2)} GB / ${totalMemory.toFixed(2)} GB`,
                    },
                    {
                        name: 'Time',
                        value: `~${time} seconds.`,
                    },
                ],
            };
-            // if the message object doesn't exist, create it
+            // If we have NO GPU config lets send system stats only
-            if (!botMessage) {
+            if (process.env.GPU == 0) {
-                (async () => {
+                embedData = {
-                    botMessage = await message.channel.send({ embeds: [embedData] });
+                    color: 0x0099ff,
-                })();
+                    title: 'Please wait.. I am thinking...',
                    fields: [
                        {
                            name: 'System Load',
                            value: `${systemLoad.toFixed(2)}%`,
                        },
                        {
                            name: 'Memory Usage',
                            value: `${usedMemory.toFixed(2)} GB / ${totalMemory.toFixed(2)} GB`,
                        },
                        {
                            name: 'Time',
                            value: `~${time} seconds.`,
                        },
                    ],
                };
                // if the message object doesn't exist, create it
                if (!botMessage) {
                    (async () => {
                        if (time == 0) return
                        botMessage = await message.channel.send({ embeds: [embedData] });
                    })();
                } else {
                    (async () => {
                        if (!isAnyConversationBusy()) {
                            botMessage.delete()
                        } else {
                            await botMessage.edit({ embeds: [embedData] }); // otherwise, update the message
                        }
                    })();
                }
            } else {
-                botMessage.edit({ embeds: [embedData] }); // otherwise, update the message
+                // If we do have GPU=1 lets send some card info too!
                smi(function (err, data) {
                    if (err) {
                        // Handle error if smi function fails
                        console.error(err);
                        return;
                    }
                    let utilization = data.nvidia_smi_log.gpu.utilization;
                    let gpuUtilization = utilization.gpu_util;
                    let memoryUtilization = utilization.memory_util;
                    let gpuTemp = data.nvidia_smi_log.gpu.temperature.gpu_temp;
                    // These are not used until nvidia-docker fixes their support
                    let gpuTarget = data.nvidia_smi_log.gpu.temperature.gpu_target_temperature;
                    let gpuFanSpeed = data.nvidia_smi_log.gpu.fan_speed;
                    embedData = {
                        color: 0x0099ff,
                        title: 'Please wait.. I am thinking...',
                        fields: [
                            {
                                name: 'System Load',
                                value: `${systemLoad.toFixed(2)}%`,
                            },
                            {
                                name: 'Memory Usage',
                                value: `${usedMemory.toFixed(2)} GB / ${totalMemory.toFixed(2)} GB`,
                            },
                            {
                                name: 'GPU Utilization',
                                value: `${gpuUtilization}`,
                            },
                            {
                                name: 'Memory Utilization',
                                value: `${memoryUtilization}`,
                            },
                            {
                                name: 'GPU Temperature',
                                value: `${gpuTemp}`,
                            },
                            {
                                name: 'Time',
                                value: `~${time} seconds.`,
                            },
                        ],
                    };
                    // if the message object doesn't exist, create it
                    if (!botMessage) {
                        (async () => {
                            if (time == 0) return
                            botMessage = await message.channel.send({ embeds: [embedData] });
                        })();
                    } else {
                        (async () => {
                            if (!isAnyConversationBusy()) {
                                botMessage.delete()
                            } else {
                                await botMessage.edit({ embeds: [embedData] }); // otherwise, update the message
                            }
                        })();
                    }
                })
            }
        });
    };
    // call the function initially
    await showSystemLoad();
    // Grab the REFRESH_INTERVAL from ENV if not exist, lets use 7 (seconds)
    const refreshInterval = setInterval(showSystemLoad, (process.env.REFRESH_INTERVAL || 7) * 1000);
    try {
        // call the function initially
        await showSystemLoad();
        // Grab the REFRESH_INTERVAL from ENV if not exist, lets use 7 (seconds)
        const refreshInterval = setInterval(showSystemLoad, (process.env.REFRESH_INTERVAL || 7) * 1000);
        function countLlamaTokens(messages) {
            let totalTokens = 0;
            for (const message of messages) {
                if (message.role === 'user' || message.role === 'assistant') {
                    const encodedTokens = llamaTokenizer.encode(message.content);
                    totalTokens += encodedTokens.length;
                }
            }
            return totalTokens;
        }
        let totalTokens = countLlamaTokens(messagesCopy);
        console.log(`Total Llama tokens: ${totalTokens}`);
        let tokenLength = totalTokens
        // Remove older conversations if necessary
        const maxLength = 1800;
        const tolerance = 25; // allow for some flexibility in the token length
        if (tokenLength > maxLength + tolerance) {
            const diff = tokenLength - (maxLength + tolerance);
            let removedTokens = 0;
            // Iterate over the messages in reverse order
            for (let i = messagesCopy.length - 1; i >= 0; i--) {
                const message = messagesCopy[i];
                const messageTokens = countLlamaTokens([message]);
                // Check if the current message plus the tokens in the message is less than or equal to the diff
                if (removedTokens + messageTokens <= diff) {
                    messagesCopy.splice(i, 1);
                    removedTokens += messageTokens;
                    console.log(removedTokens + " removed \nAfter Resize: " + countLlamaTokens(messagesCopy));
                } else {
                    // Remove more than one message if necessary to bring the total length below the maximum allowed length
                    const messagesToRemove = Math.floor(diff / messageTokens);
                    for (let j = 0; j < messagesToRemove; j++) {
                        messagesCopy.splice(i, 1);
                        removedTokens += messageTokens;
                    }
                    break;
                }
            }
        }
        // Sending request to our API
        const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
            method: 'POST',
            headers: {
@ -330,9 +503,18 @@ async function generateResponse(conversation, message) {
        const responseText = choice.message.content;
        // clear the interval, replace the "please wait" message with the response, and update the message
        clearInterval(refreshInterval);
        console.log(responseText);
-        botMessage.delete()
+        try {
        if (time > 2) {
        await botMessage.delete();
        clearInterval(refreshInterval);
        botMessage = null;
        console.log("Time limit reached. Message deleted.");
       }
      } catch (err) {
      console.log("Error deleting message: ", err);
     }
        return responseText;
@ -340,6 +522,7 @@ async function generateResponse(conversation, message) {
        throw err;
    } finally {
        clearTimeout(timeout);
        botMessage = null;
        time = 0
    }
 }
--- a/package-lock.json
+++ b/package-lock.json
@ -0,0 +1,757 @@
 {
  "name": "llama-cpp-python-djs-bot",
  "version": "1.0.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "llama-cpp-python-djs-bot",
      "version": "1.0.0",
      "license": "ISC",
      "dependencies": {
        "cheerio": "^1.0.0-rc.12",
        "cpu-stat": "^2.0.1",
        "discord.js": "^14.9.0",
        "dotenv": "^16.0.3",
        "gpt-tokenizer": "^2.1.1",
        "llama-tokenizer-js": "^1.0.0",
        "node-fetch": "^3.3.1",
        "node-nvidia-smi": "^1.0.0",
        "os": "^0.1.2",
        "tiktoken": "^1.0.10"
      }
    },
    "node_modules/@discordjs/builders": {
      "version": "1.6.3",
      "resolved": "https://registry.npmjs.org/@discordjs/builders/-/builders-1.6.3.tgz",
      "integrity": "sha512-CTCh8NqED3iecTNuiz49mwSsrc2iQb4d0MjMdmS/8pb69Y4IlzJ/DIy/p5GFlgOrFbNO2WzMHkWKQSiJ3VNXaw==",
      "dependencies": {
        "@discordjs/formatters": "^0.3.1",
        "@discordjs/util": "^0.3.1",
        "@sapphire/shapeshift": "^3.8.2",
        "discord-api-types": "^0.37.41",
        "fast-deep-equal": "^3.1.3",
        "ts-mixer": "^6.0.3",
        "tslib": "^2.5.0"
      },
      "engines": {
        "node": ">=16.9.0"
      }
    },
    "node_modules/@discordjs/collection": {
      "version": "1.5.1",
      "resolved": "https://registry.npmjs.org/@discordjs/collection/-/collection-1.5.1.tgz",
      "integrity": "sha512-aWEc9DCf3TMDe9iaJoOnO2+JVAjeRNuRxPZQA6GVvBf+Z3gqUuWYBy2NWh4+5CLYq5uoc3MOvUQ5H5m8CJBqOA==",
      "engines": {
        "node": ">=16.9.0"
      }
    },
    "node_modules/@discordjs/formatters": {
      "version": "0.3.1",
      "resolved": "https://registry.npmjs.org/@discordjs/formatters/-/formatters-0.3.1.tgz",
      "integrity": "sha512-M7X4IGiSeh4znwcRGcs+49B5tBkNDn4k5bmhxJDAUhRxRHTiFAOTVUNQ6yAKySu5jZTnCbSvTYHW3w0rAzV1MA==",
      "dependencies": {
        "discord-api-types": "^0.37.41"
      },
      "engines": {
        "node": ">=16.9.0"
      }
    },
    "node_modules/@discordjs/rest": {
      "version": "1.7.1",
      "resolved": "https://registry.npmjs.org/@discordjs/rest/-/rest-1.7.1.tgz",
      "integrity": "sha512-Ofa9UqT0U45G/eX86cURQnX7gzOJLG2oC28VhIk/G6IliYgQF7jFByBJEykPSHE4MxPhqCleYvmsrtfKh1nYmQ==",
      "dependencies": {
        "@discordjs/collection": "^1.5.1",
        "@discordjs/util": "^0.3.0",
        "@sapphire/async-queue": "^1.5.0",
        "@sapphire/snowflake": "^3.4.2",
        "discord-api-types": "^0.37.41",
        "file-type": "^18.3.0",
        "tslib": "^2.5.0",
        "undici": "^5.22.0"
      },
      "engines": {
        "node": ">=16.9.0"
      }
    },
    "node_modules/@discordjs/util": {
      "version": "0.3.1",
      "resolved": "https://registry.npmjs.org/@discordjs/util/-/util-0.3.1.tgz",
      "integrity": "sha512-HxXKYKg7vohx2/OupUN/4Sd02Ev3PBJ5q0gtjdcvXb0ErCva8jNHWfe/v5sU3UKjIB/uxOhc+TDOnhqffj9pRA==",
      "engines": {
        "node": ">=16.9.0"
      }
    },
    "node_modules/@discordjs/ws": {
      "version": "0.8.3",
      "resolved": "https://registry.npmjs.org/@discordjs/ws/-/ws-0.8.3.tgz",
      "integrity": "sha512-hcYtppanjHecbdNyCKQNH2I4RP9UrphDgmRgLYrATEQF1oo4sYSve7ZmGsBEXSzH72MO2tBPdWSThunbxUVk0g==",
      "dependencies": {
        "@discordjs/collection": "^1.5.1",
        "@discordjs/rest": "^1.7.1",
        "@discordjs/util": "^0.3.1",
        "@sapphire/async-queue": "^1.5.0",
        "@types/ws": "^8.5.4",
        "@vladfrangu/async_event_emitter": "^2.2.1",
        "discord-api-types": "^0.37.41",
        "tslib": "^2.5.0",
        "ws": "^8.13.0"
      },
      "engines": {
        "node": ">=16.9.0"
      }
    },
    "node_modules/@sapphire/async-queue": {
      "version": "1.5.0",
      "resolved": "https://registry.npmjs.org/@sapphire/async-queue/-/async-queue-1.5.0.tgz",
      "integrity": "sha512-JkLdIsP8fPAdh9ZZjrbHWR/+mZj0wvKS5ICibcLrRI1j84UmLMshx5n9QmL8b95d4onJ2xxiyugTgSAX7AalmA==",
      "engines": {
        "node": ">=v14.0.0",
        "npm": ">=7.0.0"
      }
    },
    "node_modules/@sapphire/shapeshift": {
      "version": "3.9.2",
      "resolved": "https://registry.npmjs.org/@sapphire/shapeshift/-/shapeshift-3.9.2.tgz",
      "integrity": "sha512-YRbCXWy969oGIdqR/wha62eX8GNHsvyYi0Rfd4rNW6tSVVa8p0ELiMEuOH/k8rgtvRoM+EMV7Csqz77YdwiDpA==",
      "dependencies": {
        "fast-deep-equal": "^3.1.3",
        "lodash": "^4.17.21"
      },
      "engines": {
        "node": ">=v14.0.0",
        "npm": ">=7.0.0"
      }
    },
    "node_modules/@sapphire/snowflake": {
      "version": "3.5.1",
      "resolved": "https://registry.npmjs.org/@sapphire/snowflake/-/snowflake-3.5.1.tgz",
      "integrity": "sha512-BxcYGzgEsdlG0dKAyOm0ehLGm2CafIrfQTZGWgkfKYbj+pNNsorZ7EotuZukc2MT70E0UbppVbtpBrqpzVzjNA==",
      "engines": {
        "node": ">=v14.0.0",
        "npm": ">=7.0.0"
      }
    },
    "node_modules/@tokenizer/token": {
      "version": "0.3.0",
      "resolved": "https://registry.npmjs.org/@tokenizer/token/-/token-0.3.0.tgz",
      "integrity": "sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A=="
    },
    "node_modules/@types/node": {
      "version": "20.3.0",
      "resolved": "https://registry.npmjs.org/@types/node/-/node-20.3.0.tgz",
      "integrity": "sha512-cumHmIAf6On83X7yP+LrsEyUOf/YlociZelmpRYaGFydoaPdxdt80MAbu6vWerQT2COCp2nPvHdsbD7tHn/YlQ=="
    },
    "node_modules/@types/ws": {
      "version": "8.5.5",
      "resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.5.5.tgz",
      "integrity": "sha512-lwhs8hktwxSjf9UaZ9tG5M03PGogvFaH8gUgLNbN9HKIg0dvv6q+gkSuJ8HN4/VbyxkuLzCjlN7GquQ0gUJfIg==",
      "dependencies": {
        "@types/node": "*"
      }
    },
    "node_modules/@vladfrangu/async_event_emitter": {
      "version": "2.2.2",
      "resolved": "https://registry.npmjs.org/@vladfrangu/async_event_emitter/-/async_event_emitter-2.2.2.tgz",
      "integrity": "sha512-HIzRG7sy88UZjBJamssEczH5q7t5+axva19UbZLO6u0ySbYPrwzWiXBcC0WuHyhKKoeCyneH+FvYzKQq/zTtkQ==",
      "engines": {
        "node": ">=v14.0.0",
        "npm": ">=7.0.0"
      }
    },
    "node_modules/boolbase": {
      "version": "1.0.0",
      "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
      "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww=="
    },
    "node_modules/busboy": {
      "version": "1.6.0",
      "resolved": "https://registry.npmjs.org/busboy/-/busboy-1.6.0.tgz",
      "integrity": "sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA==",
      "dependencies": {
        "streamsearch": "^1.1.0"
      },
      "engines": {
        "node": ">=10.16.0"
      }
    },
    "node_modules/cheerio": {
      "version": "1.0.0-rc.12",
      "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0-rc.12.tgz",
      "integrity": "sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==",
      "dependencies": {
        "cheerio-select": "^2.1.0",
        "dom-serializer": "^2.0.0",
        "domhandler": "^5.0.3",
        "domutils": "^3.0.1",
        "htmlparser2": "^8.0.1",
        "parse5": "^7.0.0",
        "parse5-htmlparser2-tree-adapter": "^7.0.0"
      },
      "engines": {
        "node": ">= 6"
      },
      "funding": {
        "url": "https://github.com/cheeriojs/cheerio?sponsor=1"
      }
    },
    "node_modules/cheerio-select": {
      "version": "2.1.0",
      "resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz",
      "integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==",
      "dependencies": {
        "boolbase": "^1.0.0",
        "css-select": "^5.1.0",
        "css-what": "^6.1.0",
        "domelementtype": "^2.3.0",
        "domhandler": "^5.0.3",
        "domutils": "^3.0.1"
      },
      "funding": {
        "url": "https://github.com/sponsors/fb55"
      }
    },
    "node_modules/cpu-stat": {
      "version": "2.0.1",
      "resolved": "https://registry.npmjs.org/cpu-stat/-/cpu-stat-2.0.1.tgz",
      "integrity": "sha512-bC4ts/0IjYfNV6Dc7F2NauWM0tip0fneZjRek8HqX2ZERC4oSt6dmV+GTN1mfE9OKbLAppv58M2PVzKLGB731w=="
    },
    "node_modules/css-select": {
      "version": "5.1.0",
      "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.1.0.tgz",
      "integrity": "sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==",
      "dependencies": {
        "boolbase": "^1.0.0",
        "css-what": "^6.1.0",
        "domhandler": "^5.0.2",
        "domutils": "^3.0.1",
        "nth-check": "^2.0.1"
      },
      "funding": {
        "url": "https://github.com/sponsors/fb55"
      }
    },
    "node_modules/css-what": {
      "version": "6.1.0",
      "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz",
      "integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==",
      "engines": {
        "node": ">= 6"
      },
      "funding": {
        "url": "https://github.com/sponsors/fb55"
      }
    },
    "node_modules/data-uri-to-buffer": {
      "version": "4.0.1",
      "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz",
      "integrity": "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==",
      "engines": {
        "node": ">= 12"
      }
    },
    "node_modules/discord-api-types": {
      "version": "0.37.43",
      "resolved": "https://registry.npmjs.org/discord-api-types/-/discord-api-types-0.37.43.tgz",
      "integrity": "sha512-bBhDWU3TF9KADxR/mHp1K4Bvu/LRtFQdGyBjADu4e66F3ZnD4kp12W/SJCttIaCcMXzPV3sfty6eDGRNRph51Q=="
    },
    "node_modules/discord.js": {
      "version": "14.11.0",
      "resolved": "https://registry.npmjs.org/discord.js/-/discord.js-14.11.0.tgz",
      "integrity": "sha512-CkueWYFQ28U38YPR8HgsBR/QT35oPpMbEsTNM30Fs8loBIhnA4s70AwQEoy6JvLcpWWJO7GY0y2BUzZmuBMepQ==",
      "dependencies": {
        "@discordjs/builders": "^1.6.3",
        "@discordjs/collection": "^1.5.1",
        "@discordjs/formatters": "^0.3.1",
        "@discordjs/rest": "^1.7.1",
        "@discordjs/util": "^0.3.1",
        "@discordjs/ws": "^0.8.3",
        "@sapphire/snowflake": "^3.4.2",
        "@types/ws": "^8.5.4",
        "discord-api-types": "^0.37.41",
        "fast-deep-equal": "^3.1.3",
        "lodash.snakecase": "^4.1.1",
        "tslib": "^2.5.0",
        "undici": "^5.22.0",
        "ws": "^8.13.0"
      },
      "engines": {
        "node": ">=16.9.0"
      }
    },
    "node_modules/dom-serializer": {
      "version": "2.0.0",
      "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz",
      "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==",
      "dependencies": {
        "domelementtype": "^2.3.0",
        "domhandler": "^5.0.2",
        "entities": "^4.2.0"
      },
      "funding": {
        "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1"
      }
    },
    "node_modules/domelementtype": {
      "version": "2.3.0",
      "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz",
      "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==",
      "funding": [
        {
          "type": "github",
          "url": "https://github.com/sponsors/fb55"
        }
      ]
    },
    "node_modules/domhandler": {
      "version": "5.0.3",
      "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz",
      "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==",
      "dependencies": {
        "domelementtype": "^2.3.0"
      },
      "engines": {
        "node": ">= 4"
      },
      "funding": {
        "url": "https://github.com/fb55/domhandler?sponsor=1"
      }
    },
    "node_modules/domutils": {
      "version": "3.1.0",
      "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.1.0.tgz",
      "integrity": "sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==",
      "dependencies": {
        "dom-serializer": "^2.0.0",
        "domelementtype": "^2.3.0",
        "domhandler": "^5.0.3"
      },
      "funding": {
        "url": "https://github.com/fb55/domutils?sponsor=1"
      }
    },
    "node_modules/dotenv": {
      "version": "16.1.4",
      "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.1.4.tgz",
      "integrity": "sha512-m55RtE8AsPeJBpOIFKihEmqUcoVncQIwo7x9U8ZwLEZw9ZpXboz2c+rvog+jUaJvVrZ5kBOeYQBX5+8Aa/OZQw==",
      "engines": {
        "node": ">=12"
      },
      "funding": {
        "url": "https://github.com/motdotla/dotenv?sponsor=1"
      }
    },
    "node_modules/entities": {
      "version": "4.5.0",
      "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
      "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
      "engines": {
        "node": ">=0.12"
      },
      "funding": {
        "url": "https://github.com/fb55/entities?sponsor=1"
      }
    },
    "node_modules/fast-deep-equal": {
      "version": "3.1.3",
      "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
      "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q=="
    },
    "node_modules/fetch-blob": {
      "version": "3.2.0",
      "resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.2.0.tgz",
      "integrity": "sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==",
      "funding": [
        {
          "type": "github",
          "url": "https://github.com/sponsors/jimmywarting"
        },
        {
          "type": "paypal",
          "url": "https://paypal.me/jimmywarting"
        }
      ],
      "dependencies": {
        "node-domexception": "^1.0.0",
        "web-streams-polyfill": "^3.0.3"
      },
      "engines": {
        "node": "^12.20 || >= 14.13"
      }
    },
    "node_modules/file-type": {
      "version": "18.5.0",
      "resolved": "https://registry.npmjs.org/file-type/-/file-type-18.5.0.tgz",
      "integrity": "sha512-yvpl5U868+V6PqXHMmsESpg6unQ5GfnPssl4dxdJudBrr9qy7Fddt7EVX1VLlddFfe8Gj9N7goCZH22FXuSQXQ==",
      "dependencies": {
        "readable-web-to-node-stream": "^3.0.2",
        "strtok3": "^7.0.0",
        "token-types": "^5.0.1"
      },
      "engines": {
        "node": ">=14.16"
      },
      "funding": {
        "url": "https://github.com/sindresorhus/file-type?sponsor=1"
      }
    },
    "node_modules/formdata-polyfill": {
      "version": "4.0.10",
      "resolved": "https://registry.npmjs.org/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz",
      "integrity": "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==",
      "dependencies": {
        "fetch-blob": "^3.1.2"
      },
      "engines": {
        "node": ">=12.20.0"
      }
    },
    "node_modules/gpt-tokenizer": {
      "version": "2.1.1",
      "resolved": "https://registry.npmjs.org/gpt-tokenizer/-/gpt-tokenizer-2.1.1.tgz",
      "integrity": "sha512-WlX+vj6aPaZ71U6Bf18fem+5k58zlgh2a4nbc7KHy6aGVIyq3nCh709b/8momu34sV/5t/SpzWi8LayWD9uyDw==",
      "dependencies": {
        "rfc4648": "^1.5.2"
      }
    },
    "node_modules/htmlparser2": {
      "version": "8.0.2",
      "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-8.0.2.tgz",
      "integrity": "sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==",
      "funding": [
        "https://github.com/fb55/htmlparser2?sponsor=1",
        {
          "type": "github",
          "url": "https://github.com/sponsors/fb55"
        }
      ],
      "dependencies": {
        "domelementtype": "^2.3.0",
        "domhandler": "^5.0.3",
        "domutils": "^3.0.1",
        "entities": "^4.4.0"
      }
    },
    "node_modules/ieee754": {
      "version": "1.2.1",
      "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
      "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
      "funding": [
        {
          "type": "github",
          "url": "https://github.com/sponsors/feross"
        },
        {
          "type": "patreon",
          "url": "https://www.patreon.com/feross"
        },
        {
          "type": "consulting",
          "url": "https://feross.org/support"
        }
      ]
    },
    "node_modules/inherits": {
      "version": "2.0.4",
      "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
      "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
    },
    "node_modules/llama-tokenizer-js": {
      "version": "1.0.0",
      "resolved": "https://registry.npmjs.org/llama-tokenizer-js/-/llama-tokenizer-js-1.0.0.tgz",
      "integrity": "sha512-O2FsnoXwOsbrM91bd5iX6cPDJvKwvIRghwhhdgGJr4rxy3Ap9QznORqIJHjTqwy9JF1jiqP6sARo0pB6ojW/Cg=="
    },
    "node_modules/lodash": {
      "version": "4.17.21",
      "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
      "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
    },
    "node_modules/lodash.snakecase": {
      "version": "4.1.1",
      "resolved": "https://registry.npmjs.org/lodash.snakecase/-/lodash.snakecase-4.1.1.tgz",
      "integrity": "sha512-QZ1d4xoBHYUeuouhEq3lk3Uq7ldgyFXGBhg04+oRLnIz8o9T65Eh+8YdroUwn846zchkA9yDsDl5CVVaV2nqYw=="
    },
    "node_modules/node-domexception": {
      "version": "1.0.0",
      "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
      "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==",
      "funding": [
        {
          "type": "github",
          "url": "https://github.com/sponsors/jimmywarting"
        },
        {
          "type": "github",
          "url": "https://paypal.me/jimmywarting"
        }
      ],
      "engines": {
        "node": ">=10.5.0"
      }
    },
    "node_modules/node-fetch": {
      "version": "3.3.1",
      "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.3.1.tgz",
      "integrity": "sha512-cRVc/kyto/7E5shrWca1Wsea4y6tL9iYJE5FBCius3JQfb/4P4I295PfhgbJQBLTx6lATE4z+wK0rPM4VS2uow==",
      "dependencies": {
        "data-uri-to-buffer": "^4.0.0",
        "fetch-blob": "^3.1.4",
        "formdata-polyfill": "^4.0.10"
      },
      "engines": {
        "node": "^12.20.0 || ^14.13.1 || >=16.0.0"
      },
      "funding": {
        "type": "opencollective",
        "url": "https://opencollective.com/node-fetch"
      }
    },
    "node_modules/node-nvidia-smi": {
      "version": "1.0.0",
      "resolved": "https://registry.npmjs.org/node-nvidia-smi/-/node-nvidia-smi-1.0.0.tgz",
      "integrity": "sha512-vjgWll2M/FWihNPKKbhWKSfbk57WrpVv1XOR5iuIxIHNieTUuJ0HrDIZj5gsJ63dx2eTbm/dWm+fWIPfYm+D3Q==",
      "dependencies": {
        "xml2js": "^0.4.17"
      },
      "bin": {
        "node-nvidia-smi": "node-nvidia-smi.js"
      }
    },
    "node_modules/nth-check": {
      "version": "2.1.1",
      "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz",
      "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==",
      "dependencies": {
        "boolbase": "^1.0.0"
      },
      "funding": {
        "url": "https://github.com/fb55/nth-check?sponsor=1"
      }
    },
    "node_modules/os": {
      "version": "0.1.2",
      "resolved": "https://registry.npmjs.org/os/-/os-0.1.2.tgz",
      "integrity": "sha512-ZoXJkvAnljwvc56MbvhtKVWmSkzV712k42Is2mA0+0KTSRakq5XXuXpjZjgAt9ctzl51ojhQWakQQpmOvXWfjQ=="
    },
    "node_modules/parse5": {
      "version": "7.1.2",
      "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.1.2.tgz",
      "integrity": "sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==",
      "dependencies": {
        "entities": "^4.4.0"
      },
      "funding": {
        "url": "https://github.com/inikulin/parse5?sponsor=1"
      }
    },
    "node_modules/parse5-htmlparser2-tree-adapter": {
      "version": "7.0.0",
      "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.0.0.tgz",
      "integrity": "sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g==",
      "dependencies": {
        "domhandler": "^5.0.2",
        "parse5": "^7.0.0"
      },
      "funding": {
        "url": "https://github.com/inikulin/parse5?sponsor=1"
      }
    },
    "node_modules/peek-readable": {
      "version": "5.0.0",
      "resolved": "https://registry.npmjs.org/peek-readable/-/peek-readable-5.0.0.tgz",
      "integrity": "sha512-YtCKvLUOvwtMGmrniQPdO7MwPjgkFBtFIrmfSbYmYuq3tKDV/mcfAhBth1+C3ru7uXIZasc/pHnb+YDYNkkj4A==",
      "engines": {
        "node": ">=14.16"
      },
      "funding": {
        "type": "github",
        "url": "https://github.com/sponsors/Borewit"
      }
    },
    "node_modules/readable-stream": {
      "version": "3.6.2",
      "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
      "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==",
      "dependencies": {
        "inherits": "^2.0.3",
        "string_decoder": "^1.1.1",
        "util-deprecate": "^1.0.1"
      },
      "engines": {
        "node": ">= 6"
      }
    },
    "node_modules/readable-web-to-node-stream": {
      "version": "3.0.2",
      "resolved": "https://registry.npmjs.org/readable-web-to-node-stream/-/readable-web-to-node-stream-3.0.2.tgz",
      "integrity": "sha512-ePeK6cc1EcKLEhJFt/AebMCLL+GgSKhuygrZ/GLaKZYEecIgIECf4UaUuaByiGtzckwR4ain9VzUh95T1exYGw==",
      "dependencies": {
        "readable-stream": "^3.6.0"
      },
      "engines": {
        "node": ">=8"
      },
      "funding": {
        "type": "github",
        "url": "https://github.com/sponsors/Borewit"
      }
    },
    "node_modules/rfc4648": {
      "version": "1.5.2",
      "resolved": "https://registry.npmjs.org/rfc4648/-/rfc4648-1.5.2.tgz",
      "integrity": "sha512-tLOizhR6YGovrEBLatX1sdcuhoSCXddw3mqNVAcKxGJ+J0hFeJ+SjeWCv5UPA/WU3YzWPPuCVYgXBKZUPGpKtg=="
    },
    "node_modules/safe-buffer": {
      "version": "5.2.1",
      "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
      "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
      "funding": [
        {
          "type": "github",
          "url": "https://github.com/sponsors/feross"
        },
        {
          "type": "patreon",
          "url": "https://www.patreon.com/feross"
        },
        {
          "type": "consulting",
          "url": "https://feross.org/support"
        }
      ]
    },
    "node_modules/sax": {
      "version": "1.2.4",
      "resolved": "https://registry.npmjs.org/sax/-/sax-1.2.4.tgz",
      "integrity": "sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw=="
    },
    "node_modules/streamsearch": {
      "version": "1.1.0",
      "resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-1.1.0.tgz",
      "integrity": "sha512-Mcc5wHehp9aXz1ax6bZUyY5afg9u2rv5cqQI3mRrYkGC8rW2hM02jWuwjtL++LS5qinSyhj2QfLyNsuc+VsExg==",
      "engines": {
        "node": ">=10.0.0"
      }
    },
    "node_modules/string_decoder": {
      "version": "1.3.0",
      "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
      "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==",
      "dependencies": {
        "safe-buffer": "~5.2.0"
      }
    },
    "node_modules/strtok3": {
      "version": "7.0.0",
      "resolved": "https://registry.npmjs.org/strtok3/-/strtok3-7.0.0.tgz",
      "integrity": "sha512-pQ+V+nYQdC5H3Q7qBZAz/MO6lwGhoC2gOAjuouGf/VO0m7vQRh8QNMl2Uf6SwAtzZ9bOw3UIeBukEGNJl5dtXQ==",
      "dependencies": {
        "@tokenizer/token": "^0.3.0",
        "peek-readable": "^5.0.0"
      },
      "engines": {
        "node": ">=14.16"
      },
      "funding": {
        "type": "github",
        "url": "https://github.com/sponsors/Borewit"
      }
    },
    "node_modules/tiktoken": {
      "version": "1.0.10",
      "resolved": "https://registry.npmjs.org/tiktoken/-/tiktoken-1.0.10.tgz",
      "integrity": "sha512-gF8ndTCNu7WcRFbl1UUWaFIB4CTXmHzS3tRYdyUYF7x3C6YR6Evoao4zhKDmWIwv2PzNbzoQMV8Pxt+17lEDbA=="
    },
    "node_modules/token-types": {
      "version": "5.0.1",
      "resolved": "https://registry.npmjs.org/token-types/-/token-types-5.0.1.tgz",
      "integrity": "sha512-Y2fmSnZjQdDb9W4w4r1tswlMHylzWIeOKpx0aZH9BgGtACHhrk3OkT52AzwcuqTRBZtvvnTjDBh8eynMulu8Vg==",
      "dependencies": {
        "@tokenizer/token": "^0.3.0",
        "ieee754": "^1.2.1"
      },
      "engines": {
        "node": ">=14.16"
      },
      "funding": {
        "type": "github",
        "url": "https://github.com/sponsors/Borewit"
      }
    },
    "node_modules/ts-mixer": {
      "version": "6.0.3",
      "resolved": "https://registry.npmjs.org/ts-mixer/-/ts-mixer-6.0.3.tgz",
      "integrity": "sha512-k43M7uCG1AkTyxgnmI5MPwKoUvS/bRvLvUb7+Pgpdlmok8AoqmUaZxUUw8zKM5B1lqZrt41GjYgnvAi0fppqgQ=="
    },
    "node_modules/tslib": {
      "version": "2.5.3",
      "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz",
      "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w=="
    },
    "node_modules/undici": {
      "version": "5.22.1",
      "resolved": "https://registry.npmjs.org/undici/-/undici-5.22.1.tgz",
      "integrity": "sha512-Ji2IJhFXZY0x/0tVBXeQwgPlLWw13GVzpsWPQ3rV50IFMMof2I55PZZxtm4P6iNq+L5znYN9nSTAq0ZyE6lSJw==",
      "dependencies": {
        "busboy": "^1.6.0"
      },
      "engines": {
        "node": ">=14.0"
      }
    },
    "node_modules/util-deprecate": {
      "version": "1.0.2",
      "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
      "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw=="
    },
    "node_modules/web-streams-polyfill": {
      "version": "3.2.1",
      "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.2.1.tgz",
      "integrity": "sha512-e0MO3wdXWKrLbL0DgGnUV7WHVuw9OUvL4hjgnPkIeEvESk74gAITi5G606JtZPp39cd8HA9VQzCIvA49LpPN5Q==",
      "engines": {
        "node": ">= 8"
      }
    },
    "node_modules/ws": {
      "version": "8.13.0",
      "resolved": "https://registry.npmjs.org/ws/-/ws-8.13.0.tgz",
      "integrity": "sha512-x9vcZYTrFPC7aSIbj7sRCYo7L/Xb8Iy+pW0ng0wt2vCJv7M9HOMy0UoN3rr+IFC7hb7vXoqS+P9ktyLLLhO+LA==",
      "engines": {
        "node": ">=10.0.0"
      },
      "peerDependencies": {
        "bufferutil": "^4.0.1",
        "utf-8-validate": ">=5.0.2"
      },
      "peerDependenciesMeta": {
        "bufferutil": {
          "optional": true
        },
        "utf-8-validate": {
          "optional": true
        }
      }
    },
    "node_modules/xml2js": {
      "version": "0.4.23",
      "resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.4.23.tgz",
      "integrity": "sha512-ySPiMjM0+pLDftHgXY4By0uswI3SPKLDw/i3UXbnO8M/p28zqexCUoPmQFrYD+/1BzhGJSs2i1ERWKJAtiLrug==",
      "dependencies": {
        "sax": ">=0.6.0",
        "xmlbuilder": "~11.0.0"
      },
      "engines": {
        "node": ">=4.0.0"
      }
    },
    "node_modules/xmlbuilder": {
      "version": "11.0.1",
      "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-11.0.1.tgz",
      "integrity": "sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==",
      "engines": {
        "node": ">=4.0"
      }
    }
  }
 }
--- a/package.json
+++ b/package.json
@ -10,11 +10,15 @@
  "author": "",
  "license": "ISC",
  "dependencies": {
    "discord.js": "^14.9.0",
    "cheerio": "^1.0.0-rc.12",
    "cpu-stat": "^2.0.1",
    "discord.js": "^14.9.0",
    "dotenv": "^16.0.3",
    "gpt-tokenizer": "^2.1.1",
    "llama-tokenizer-js": "^1.0.0",
    "node-fetch": "^3.3.1",
    "node-nvidia-smi": "^1.0.0",
    "os": "^0.1.2",
-    "cpu-stat": "^2.0.1"
+    "tiktoken": "^1.0.10"
  }
 }
--- a/server/Dockerfile
+++ b/server/Dockerfile
@ -1,11 +1,11 @@
-FROM python:bullseye
+FROM ubuntu:latest
 RUN apt-get update; \
    apt-get install -y --no-install-recommends \
-    build-essential
+    build-essential cmake python3 python3-pip
 WORKDIR /usr/src/app
-RUN pip install --no-cache-dir llama-cpp-python[server]
+RUN CMAKE_ARGS="-DLLAMA_METAL=on" FORCE_CMAKE=1 pip install llama-cpp-python[server]
 CMD python3 -m llama_cpp.server
Author	SHA1	Message	Date
Raven Scott	f53c0c7195	Fix up NodeJS install GPU side	2023-11-14 18:54:17 -05:00
Raven Scott	b723f44587	Fix up HF config - Needs testing	2023-11-14 18:21:35 -05:00
Raven Scott	7c7b5d11b7	fix nodejs install for HuggingFace	2023-11-14 18:19:39 -05:00
Raven Scott	a5d2530456	Adding error checking for stat message removal	2023-11-14 18:18:37 -05:00
Raven Scott	1f3ff44317	fix token reducer	2023-08-21 21:15:10 -04:00
Raven Scott	a980f3cd43	improving token reducer logic	2023-08-15 01:41:39 -04:00
Raven Scott	05e79cba3a	fixing up token reducing if sessions too large	2023-08-15 01:08:56 -04:00
Raven Scott	f91d66b2b3	Adding assistant to Counter	2023-08-14 23:44:06 -04:00
Raven Scott	6efd069b5d	counting tokens properly	2023-08-14 23:00:23 -04:00
Raven Scott	bd435ca311	default to disk cache	2023-06-12 18:45:39 +02:00
Raven Scott	61ee13bfbd	remove comments	2023-06-12 18:16:16 +02:00
Raven Scott	10473ef702	beginnings of using a tokenizer to check CTX length, remove context if needed	2023-06-12 18:15:11 +02:00
Raven Scott	20c83a656a	update dockerfile for GPU	2023-05-31 23:02:07 +02:00
Raven Scott	61c2fed773	update dockerfile for server-non-gpu	2023-05-27 07:35:41 +02:00
Raven Scott	6ed34de804	update readme	2023-05-27 02:02:38 +02:00
Raven Scott	6bf6c1ef28	Dont send anything if time==0	2023-05-23 15:53:53 +02:00
Raven Scott	7102bf32f0	Fix GPU Cache	2023-05-23 14:56:21 +02:00
Raven Scott	099dbf908b	update	2023-05-22 18:38:47 +02:00
Raven Scott	bc6157e4a1	bug fix	2023-05-22 18:23:17 +02:00
Raven Scott	da0650d3b6	resetting botMessage to stop crashing	2023-05-22 17:46:59 +02:00
Raven Scott	393999165b	adding GPU ENV Var to main compose	2023-05-22 17:35:20 +02:00
Raven Scott	c75926728c	comments	2023-05-21 00:20:53 +02:00
Raven Scott	927b5c834d	Add warning about caching with cuBLAS	2023-05-20 23:47:16 +02:00
Raven Scott	668b343cbb	changing the name scheme for docker-compose	2023-05-20 15:14:25 +02:00
Raven Scott	51a41292e6	bringing back the embed	2023-05-20 15:11:58 +02:00
Raven Scott	2ac55922d2	revert back to older style of workflow until new logic is written	2023-05-20 04:08:27 +02:00
Raven Scott	73636804a5	bug fix	2023-05-20 02:49:17 +02:00
Raven Scott	c7a3316d45	bug fix	2023-05-19 23:49:55 +02:00
Raven Scott	1fe0f20e6f	revert	2023-05-19 22:55:25 +02:00
Raven Scott	8ce9e18656	removing non needed code	2023-05-19 22:43:47 +02:00
Raven Scott	2924822a49	ensuring GPU env is set in default .env	2023-05-19 21:45:20 +02:00
Raven Scott	368004f10b	adding NVIDIA GPU Support with Stats	2023-05-19 21:32:21 +02:00
Raven Scott	4b090592ad	Fix hugging face	2023-05-08 22:15:51 +02:00
Raven Scott	64be911772	adding REPEAT_PENALTY	2023-05-08 22:00:24 +02:00
Raven Scott	83a7bb90ed	Fix up env	2023-05-08 21:12:57 +02:00
Raven Scott	14fa3b06ff	Remove the abuse of looping functions	2023-05-08 15:45:05 +02:00
Raven Scott	c7d8735c8a	Remove any @ symbol to remove spam pings	2023-05-08 15:42:15 +02:00
Raven Scott	012566e93c	update model var	2023-05-07 14:18:17 +02:00
Raven Scott	d18cb39ecd	update hf	2023-05-06 22:19:08 +02:00
Raven Scott	9bc44c56f8	update hf	2023-05-06 14:34:53 +02:00
Raven Scott	973952aee1	Revert "Urban" This reverts commit bd5ef4db9ad28df0b84374486f0c64e35affe1f4.	2023-05-06 03:41:36 +02:00
Raven Scott	3d2546ebcf	Revert "Adding urban dictionary lookups" This reverts commit f87542132ea4e4a304d089afadff5d781a44a2e9.	2023-05-06 03:41:16 +02:00
Raven Scott	f87542132e	Adding urban dictionary lookups	2023-05-06 02:32:10 +02:00
Raven Scott	bd5ef4db9a	Urban	2023-05-06 02:30:46 +02:00
Raven Scott	ee47531d2f	HuggingFace Deploy Instructions	2023-05-06 01:49:21 +02:00
Raven Scott	7ea6abb0f8	HuggingFace Deploy Instructions	2023-05-06 01:46:37 +02:00
Raven Scott	4ff16b4fc7	HuggingFace Deploy Instructions	2023-05-06 01:45:01 +02:00
Raven Scott	d1807d37ad	HuggingFace Deploy Instructions	2023-05-06 01:41:03 +02:00
Raven Scott	af13ca3717	Move overflow delay from static to .env	2023-05-05 21:07:39 +02:00
Raven Scott	cb880f9bc1	Fix Caching web reading the web: Append last message rather than add to the conversation.	2023-05-05 20:49:29 +02:00
Raven Scott	aec98b576b	update default.env	2023-05-05 19:31:17 +02:00
Raven Scott	3b1ec922c4	remove \t	2023-05-05 19:08:13 +02:00
Raven Scott	e54826085d	remove redunant code	2023-05-05 18:32:46 +02:00
Raven Scott	18923b7909	cleaner output from web scrapes	2023-05-05 18:31:24 +02:00
Raven Scott	57545a20ef	update	2023-05-05 18:12:43 +02:00
Raven Scott	49f2c08544	update	2023-05-05 18:12:10 +02:00
Raven Scott	45bc433060	return on error	2023-05-05 17:48:32 +02:00
Raven Scott	c83366d0a8	update package.json for cheerio	2023-05-05 17:00:40 +02:00
Raven Scott	7c5b036114	update	2023-05-05 16:56:30 +02:00
Raven Scott	a338fb725a	Adding: Web Access allowing the AI to browse URLs	2023-05-05 16:55:29 +02:00
Raven Scott	a099502f1a	Fix ShowSystemLoad	2023-05-02 14:38:57 +02:00
Raven Scott	c0ab1541e1	Adding refresh interval to system load status	2023-05-02 14:22:58 +02:00
Raven Scott	779ac1f462	Moving REFRESH_INTERVAL to docker-compose	2023-05-02 14:16:13 +02:00
Raven Scott	335b01287e	Moving REFRESH_INTERVAL to .env	2023-05-02 14:15:45 +02:00
Raven Scott	d4e8563817	lowercase Thinking it looks better :P	2023-05-02 14:07:36 +02:00