8 changed files with 61 additions and 242 deletions
--- a/default.env
+++ b/default.env
@ -1,48 +1,13 @@
-# Discord Token
 THE_TOKEN = "DISCORD_TOKEN_HERE"
-
-# The Channel IDs the bot will operate in seperated by commas
 CHANNEL_IDS = 1094494101631680653,1094628334727614605
-
-# The INIT prompt for all conversations.
 INIT_PROMPT = "Assistant name: ChatAI. You code, write and provide any information without any mistakes."
-
-# Loading Emebed Refresh Timing
-REFRESH_INTERVAL=10
-
-# When a message is too large for discord we chunk the response into seperate messages.
-# To ensure we do not rate limit the bot we send these at a delay interval.
-# DEFAULT: 3 a good setting is between 3 and 7 seconds.
-OVERFLOW_DELAY=3
-
-# Max Content to fetch from given URLs
-MAX_CONTENT_LENGTH=2000
-
-# Max tokens for Generations
+ROOT_PORT = 8000
+DATA_DIR = /home/USERNAME/weights
+CACHE = 1
+N_THREADS = 4
 MAX_TOKENS = 1024

 # ROOT_IP is only used when running the bot without docker compose
 ROOT_IP = 192.168.0.15

-# PORT  is only used when running the bot without docker compose
-ROOT_PORT = 8000

-# Directory to your models (llama.cpp specfic settings)
-DATA_DIR = /home/USERNAME/weights
-
-# Enable Expirmental Message Caches (Limited to single session)
-# Cache will use ~1.4 GB or MORE of RAM. ONLY ENABLE IF YOUR SYSTEM CAN HANDLE THIS.
-CACHE = 1
-
-# Set number of threads to use, currently, a standard thread will utilize 1 whole core
-# I usually will set this between all cores I physcally have OR 2 cores less to allow for other processes.
-N_THREADS = 4
-
-# Always use MMAP unless you know what you are doing
-USE_MMAP=1
-
-# Only use MLOCK if you know what it does!
-USE_MLOCK=0
-
-# The higher the number the more hard core.
-REPEAT_PENALTY=1
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -11,7 +11,7 @@ services:
      - ${DATA_DIR}:/usr/src/app/models
    environment:
      - HOST=llama-python-server
-      - MODEL=./models/ggml-vic7b-q4_0.bin
+      - MODEL=./models/gpt4-x-alpaca-13b-native-4bit-128g.bin
  llama-python-djs-bot:
    container_name: llama-python-djs-bot
    restart: unless-stopped
@ -21,7 +21,6 @@ services:
      - llama-python-server
    environment:
      - THE_TOKEN
-      - REFRESH_INTERVAL
      - CHANNEL_IDS
      - ROOT_IP=llama-python-server
      - ROOT_PORT=8000
--- a/huggingface-config/Dockerfile
+++ b/huggingface-config/Dockerfile
@ -1,42 +0,0 @@
-FROM ubuntu:latest
-
-RUN apt update
-
-RUN DEBIAN_FRONTEND=noninteractive  apt install curl sudo -y
-
-RUN curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash - 
-
-RUN DEBIAN_FRONTEND=noninteractive  apt install nodejs -y
-
-RUN npm i pm2 -g
-
-RUN mkdir -p /code/.pm2
-
-RUN mkdir -p /.pm2
-
-RUN chmod 0777 /code/.pm2 
-
-RUN chmod 0777 /.pm2
-
-RUN DEBIAN_FRONTEND=noninteractive  apt install wget python3 python3-pip -y
-
-WORKDIR /code
-
-RUN pip install --no-cache-dir llama-cpp-python[server]
-
-COPY . .
-
-RUN npm i
-
-ENV HOST localhost
-ENV PORT 7860
-ENV MODEL=/code/ggml-vic7b-q4_0.bin
-ENV CACHE=1
-ENV USE_MLOCK=0
-ENV REPEAT_PENALTY=1
-ENV MODEL=/code/ggml-vic7b-q4_0.bin
-ENV PM2_HOME=/code/.pm2
-
-RUN wget -q https://huggingface.co/eachadea/ggml-vicuna-7b-1.1/resolve/main/ggml-vic7b-q4_0.bin
-
-CMD /bin/bash /code/start.sh
--- a/huggingface-config/README.md
+++ b/huggingface-config/README.md
@ -1,17 +0,0 @@
-# How to Deploy on Hugging Face
-
-1) Move the "assets" directory and llamabot.js, package.json files into your HuggingFace repo directory.
-
-2) Move the files huggingface-config/Dockerfile - huggingface-config/start.sh - huggingface-config/startServices.json into the root of the hugging face repo.
-
-3) Edit Docker file as you need, the dockerfile is set to automatically download Vicuna 1.1 7B
-
-3) Move default.env into your repo as .env and edit for your needs
-
-4) Push the changes
-
-You should then see the bot being built and deployed on HuggingFace
-
-pm2 log will run automatically so you can see frontend and backend logs.
-
-PLEASE NOTE: Your hugging face repo should remain private!
--- a/huggingface-config/start.sh
+++ b/huggingface-config/start.sh
@ -1,6 +0,0 @@
-#!/bin/bash
-# This is the main process of the container and will stay alive as long as pm2 log is running.
-
-pm2 start startServices.json
-
-pm2 log
--- a/huggingface-config/startServices.json
+++ b/huggingface-config/startServices.json
@ -1,14 +0,0 @@
-{
-    "apps": [
-        {
-            "name": "Discord-Bot",
-            "script": "node /code/llamabot.js",
-            "args" : ""
-        },
-        {
-            "name": "AI-API",
-            "script": "python3 -m llama_cpp.server",
-            "args" : ""
-        }
-    ]
-}
--- a/llamabot.js
+++ b/llamabot.js
@ -153,20 +153,15 @@ client.on('messageCreate', async (message) => {

            // if we are over the discord char limit we need chunks...
            if (response.length > limit) {
-
-              
-                const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
-                if (chunks.length  >= 15) return await message.channel.send("Response chunks too large. Try again");
-
-
-                for (let i = 0; i < chunks.length; i++) {
-                    setTimeout(() => {
-                        message.channel.send(chunks[i]);
-                    }, i * (process.env.OVERFLOW_DELAY || 3) * 1000); // delay of 3 seconds between each chunk to save on API requests
-                }
+              const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
+              for (let i = 0; i < chunks.length; i++) {
+                setTimeout(() => {
+                  message.channel.send(chunks[i]);
+                }, i * 3000); // delay of 3 seconds between each chunk to save on API requests
+              }
            } else {
-                // We are good to go, send the response
-                await message.channel.send(response.replace("@", ""));
+              // We are good to go, send the response
+              await message.channel.send(response);
            }
            
            setPresenceOnline()
@ -180,75 +175,17 @@ client.on('messageCreate', async (message) => {
            conversation.busy = false;
        }
        conversations.set(userID, conversation); // Update user's conversation map in memory
-        console.log(conversation)
-
    } catch (err) {
        console.error(err);
-        return sendRand(errorMessages)
+        sendRand(errorMessages)
    } finally {
        setPresenceOnline()
        setBusy(message.author.id, false);
    }
 });

-import cheerio from 'cheerio';

 async function generateResponse(conversation, message) {
-
-    // Check if message contains a URL
-    const urlRegex = /(https?:\/\/[^\s]+)/g;
-    const urls = message.content.match(urlRegex);
-
-    if (urls) {
-        // If there are multiple URLs, process them one by one
-        for (const url of urls) {
-            try {
-                const res = await fetch(url);
-                const html = await res.text();
-                const $ = cheerio.load(html);
-
-                // Extract page title, meta description and content
-                const pageTitle = $('head title').text().trim();
-                const pageDescription = $('head meta[name="description"]').attr('content');
-                const pageContent = $('body').text().trim();
-
-                // Construct response message with page details
-                let response = `Title: ${pageTitle}\n`;
-                if (pageDescription) {
-                    response += `Description: ${pageDescription}\n`;
-                }
-                if (pageContent) {
-                    const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH;
-                    let plainTextContent = $('<div>').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' ');
-                    const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/;
-                    const isCode = codePattern.test(plainTextContent);
-
-                    if (isCode) {
-                        plainTextContent = plainTextContent.replace(codePattern, '');
-                    }
-                    // Remove anything enclosed in brackets
-                    plainTextContent = plainTextContent.replace(/ *\([^)]*\) */g, '');
-                    if (plainTextContent.length > MAX_CONTENT_LENGTH) {
-                        plainTextContent = plainTextContent.substring(0, MAX_CONTENT_LENGTH) + '...';
-                    }
-                    response += `Content: ${plainTextContent.trim()}`;
-                }
-                response += `URL: ${url}`;
-
-                // Get the index of the last message in the array
-                const lastMessageIndex = conversation.messages.length - 1;
-
-                // Append a new line and the new content to the existing content of the last message
-                conversation.messages[lastMessageIndex].content += "\n" + response;
-                
-                console.log("A URL was provided, response: " + response)
-
-            } catch (err) {
-                console.error(err);
-                return sendRand(errorMessages);
-            }
-        }
-    }
    const controller = new AbortController();
    const timeout = setTimeout(() => {
        controller.abort();
@ -260,68 +197,66 @@ async function generateResponse(conversation, message) {
    let time = 0
    // define a function that shows the system load percentage and updates the message
    const showSystemLoad = async () => {
-        time = Number(time) + Number(process.env.REFRESH_INTERVAL);
-        cpuStat.usagePercent(function (err, percent, seconds) {
-            if (err) {
-                return console.log(err);
-            }
+        time = time + 7;
+        cpuStat.usagePercent(function(err, percent, seconds) {
+          if (err) {
+            return console.log(err);
+          }
      
-            const systemLoad = percent;
-            const freeMemory = os.freemem() / 1024 / 1024 / 1024;
-            const totalMemory = os.totalmem() / 1024 / 1024 / 1024;
-            const usedMemory = totalMemory - freeMemory;
+          const systemLoad = percent;
+          const freeMemory = os.freemem() / 1024 / 1024 / 1024;
+          const totalMemory = os.totalmem() / 1024 / 1024 / 1024;
+          const usedMemory = totalMemory - freeMemory;
      
-            const embedData = {
-                color: 0x0099ff,
-                title: 'Please wait.. I am thinking...',
-                fields: [
-                    {
-                        name: 'System Load',
-                        value: `${systemLoad.toFixed(2)}%`,
-                    },
-                    {
-                        name: 'Memory Usage',
-                        value: `${usedMemory.toFixed(2)} GB / ${totalMemory.toFixed(2)} GB`,
-                    },
-                    {
-                        name: 'Time',
-                        value: `~${time} seconds.`,
-                    },
-                ],
-            };
+          const embedData = {
+            color: 0x0099ff,
+            title: 'Please wait.. I am Thinking...',
+            fields: [
+              {
+                name: 'System Load',
+                value: `${systemLoad.toFixed(2)}%`,
+              },
+              {
+                name: 'Memory Usage',
+                value: `${usedMemory.toFixed(2)} GB / ${totalMemory.toFixed(2)} GB`,
+              },
+              {
+                name: 'Time',
+                value: `~${time} seconds.`,
+              },
+            ],
+          };
      
-            // if the message object doesn't exist, create it
-            if (!botMessage) {
-                (async () => {
-                    botMessage = await message.channel.send({ embeds: [embedData] });
-                })();
-            } else {
-                botMessage.edit({ embeds: [embedData] }); // otherwise, update the message
-            }
+          // if the message object doesn't exist, create it
+          if (!botMessage) {
+            (async () => {
+              botMessage = await message.channel.send({ embeds: [embedData] });
+            })();
+          } else {
+            botMessage.edit({ embeds: [embedData] }); // otherwise, update the message
+          }
        });
-    };
+      };

    // call the function initially
    await showSystemLoad();

-    // Grab the REFRESH_INTERVAL from ENV if not exist, lets use 7 (seconds)
-    const refreshInterval = setInterval(showSystemLoad, (process.env.REFRESH_INTERVAL || 7) * 1000);
+    // refresh the system load percentage and update the message every 7 seconds
+    const refreshInterval = setInterval(showSystemLoad, 7000);

    try {
        const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
            method: 'POST',
            headers: {
-                'accept': 'application/json',
-                'Content-Type': 'application/json'
+              'accept': 'application/json',
+              'Content-Type': 'application/json'
            },
            body: JSON.stringify({
-                messages: messagesCopy,
-                max_tokens: Number(process.env.MAX_TOKENS), // add the max_tokens parameter here
-                repeat_penalty: Number(process.env.REPEAT_PENALTY)
-
+              messages: messagesCopy,
+              max_tokens: Number(process.env.MAX_TOKENS) // add the max_tokens parameter here
            }),
            signal: controller.signal
-        });
+          });

        const responseData = await response.json();
        console.log(JSON.stringify(responseData));
--- a/package.json
+++ b/package.json
@ -11,7 +11,6 @@
  "license": "ISC",
  "dependencies": {
    "discord.js": "^14.9.0",
-    "cheerio": "^1.0.0-rc.12",
    "dotenv": "^16.0.3",
    "node-fetch": "^3.3.1",
    "os": "^0.1.2",