Fix hugging face

adding REPEAT_PENALTY
Fix up env
2023-05-08 19:10:15 -07:00 · 2023-05-08 19:10:15 -07:00 · 2023-05-08 19:10:15 -07:00 · 2023-05-08 19:10:15 -07:00 · 2023-05-08 19:10:15 -07:00 · 2023-05-08 19:10:15 -07:00
8 changed files with 242 additions and 61 deletions
--- a/default.env
+++ b/default.env
@ -1,13 +1,48 @@
+# Discord Token
 THE_TOKEN = "DISCORD_TOKEN_HERE"
+
+# The Channel IDs the bot will operate in seperated by commas
 CHANNEL_IDS = 1094494101631680653,1094628334727614605
+
+# The INIT prompt for all conversations.
 INIT_PROMPT = "Assistant name: ChatAI. You code, write and provide any information without any mistakes."
-ROOT_PORT = 8000
-DATA_DIR = /home/USERNAME/weights
-CACHE = 1
-N_THREADS = 4
+
+# Loading Emebed Refresh Timing
+REFRESH_INTERVAL=10
+
+# When a message is too large for discord we chunk the response into seperate messages.
+# To ensure we do not rate limit the bot we send these at a delay interval.
+# DEFAULT: 3 a good setting is between 3 and 7 seconds.
+OVERFLOW_DELAY=3
+
+# Max Content to fetch from given URLs
+MAX_CONTENT_LENGTH=2000
+
+# Max tokens for Generations
 MAX_TOKENS = 1024

 # ROOT_IP is only used when running the bot without docker compose
 ROOT_IP = 192.168.0.15

+# PORT  is only used when running the bot without docker compose
+ROOT_PORT = 8000

+# Directory to your models (llama.cpp specfic settings)
+DATA_DIR = /home/USERNAME/weights
+
+# Enable Expirmental Message Caches (Limited to single session)
+# Cache will use ~1.4 GB or MORE of RAM. ONLY ENABLE IF YOUR SYSTEM CAN HANDLE THIS.
+CACHE = 1
+
+# Set number of threads to use, currently, a standard thread will utilize 1 whole core
+# I usually will set this between all cores I physcally have OR 2 cores less to allow for other processes.
+N_THREADS = 4
+
+# Always use MMAP unless you know what you are doing
+USE_MMAP=1
+
+# Only use MLOCK if you know what it does!
+USE_MLOCK=0
+
+# The higher the number the more hard core.
+REPEAT_PENALTY=1
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -11,7 +11,7 @@ services:
      - ${DATA_DIR}:/usr/src/app/models
    environment:
      - HOST=llama-python-server
-      - MODEL=./models/gpt4-x-alpaca-13b-native-4bit-128g.bin
+      - MODEL=./models/ggml-vic7b-q4_0.bin
  llama-python-djs-bot:
    container_name: llama-python-djs-bot
    restart: unless-stopped
@ -21,6 +21,7 @@ services:
      - llama-python-server
    environment:
      - THE_TOKEN
+      - REFRESH_INTERVAL
      - CHANNEL_IDS
      - ROOT_IP=llama-python-server
      - ROOT_PORT=8000
--- a/huggingface-config/Dockerfile
+++ b/huggingface-config/Dockerfile
@ -0,0 +1,42 @@
+FROM ubuntu:latest
+
+RUN apt update
+
+RUN DEBIAN_FRONTEND=noninteractive  apt install curl sudo -y
+
+RUN curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash - 
+
+RUN DEBIAN_FRONTEND=noninteractive  apt install nodejs -y
+
+RUN npm i pm2 -g
+
+RUN mkdir -p /code/.pm2
+
+RUN mkdir -p /.pm2
+
+RUN chmod 0777 /code/.pm2 
+
+RUN chmod 0777 /.pm2
+
+RUN DEBIAN_FRONTEND=noninteractive  apt install wget python3 python3-pip -y
+
+WORKDIR /code
+
+RUN pip install --no-cache-dir llama-cpp-python[server]
+
+COPY . .
+
+RUN npm i
+
+ENV HOST localhost
+ENV PORT 7860
+ENV MODEL=/code/ggml-vic7b-q4_0.bin
+ENV CACHE=1
+ENV USE_MLOCK=0
+ENV REPEAT_PENALTY=1
+ENV MODEL=/code/ggml-vic7b-q4_0.bin
+ENV PM2_HOME=/code/.pm2
+
+RUN wget -q https://huggingface.co/eachadea/ggml-vicuna-7b-1.1/resolve/main/ggml-vic7b-q4_0.bin
+
+CMD /bin/bash /code/start.sh
--- a/huggingface-config/README.md
+++ b/huggingface-config/README.md
@ -0,0 +1,17 @@
+# How to Deploy on Hugging Face
+
+1) Move the "assets" directory and llamabot.js, package.json files into your HuggingFace repo directory.
+
+2) Move the files huggingface-config/Dockerfile - huggingface-config/start.sh - huggingface-config/startServices.json into the root of the hugging face repo.
+
+3) Edit Docker file as you need, the dockerfile is set to automatically download Vicuna 1.1 7B
+
+3) Move default.env into your repo as .env and edit for your needs
+
+4) Push the changes
+
+You should then see the bot being built and deployed on HuggingFace
+
+pm2 log will run automatically so you can see frontend and backend logs.
+
+PLEASE NOTE: Your hugging face repo should remain private!
--- a/huggingface-config/start.sh
+++ b/huggingface-config/start.sh
@ -0,0 +1,6 @@
+#!/bin/bash
+# This is the main process of the container and will stay alive as long as pm2 log is running.
+
+pm2 start startServices.json
+
+pm2 log
--- a/huggingface-config/startServices.json
+++ b/huggingface-config/startServices.json
@ -0,0 +1,14 @@
+{
+    "apps": [
+        {
+            "name": "Discord-Bot",
+            "script": "node /code/llamabot.js",
+            "args" : ""
+        },
+        {
+            "name": "AI-API",
+            "script": "python3 -m llama_cpp.server",
+            "args" : ""
+        }
+    ]
+}
--- a/llamabot.js
+++ b/llamabot.js
@ -153,17 +153,22 @@ client.on('messageCreate', async (message) => {

            // if we are over the discord char limit we need chunks...
            if (response.length > limit) {
-              const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
-              for (let i = 0; i < chunks.length; i++) {
-                setTimeout(() => {
-                  message.channel.send(chunks[i]);
-                }, i * 3000); // delay of 3 seconds between each chunk to save on API requests
-              }
+
+              
+                const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
+                if (chunks.length  >= 15) return await message.channel.send("Response chunks too large. Try again");
+
+
+                for (let i = 0; i < chunks.length; i++) {
+                    setTimeout(() => {
+                        message.channel.send(chunks[i]);
+                    }, i * (process.env.OVERFLOW_DELAY || 3) * 1000); // delay of 3 seconds between each chunk to save on API requests
+                }
            } else {
-              // We are good to go, send the response
-              await message.channel.send(response);
+                // We are good to go, send the response
+                await message.channel.send(response.replace("@", ""));
            }
-            
+
            setPresenceOnline()
            setBusy(message.author.id, false);
        } else {
@ -175,17 +180,75 @@ client.on('messageCreate', async (message) => {
            conversation.busy = false;
        }
        conversations.set(userID, conversation); // Update user's conversation map in memory
+        console.log(conversation)
+
    } catch (err) {
        console.error(err);
-        sendRand(errorMessages)
+        return sendRand(errorMessages)
    } finally {
        setPresenceOnline()
        setBusy(message.author.id, false);
    }
 });

+import cheerio from 'cheerio';

 async function generateResponse(conversation, message) {
+
+    // Check if message contains a URL
+    const urlRegex = /(https?:\/\/[^\s]+)/g;
+    const urls = message.content.match(urlRegex);
+
+    if (urls) {
+        // If there are multiple URLs, process them one by one
+        for (const url of urls) {
+            try {
+                const res = await fetch(url);
+                const html = await res.text();
+                const $ = cheerio.load(html);
+
+                // Extract page title, meta description and content
+                const pageTitle = $('head title').text().trim();
+                const pageDescription = $('head meta[name="description"]').attr('content');
+                const pageContent = $('body').text().trim();
+
+                // Construct response message with page details
+                let response = `Title: ${pageTitle}\n`;
+                if (pageDescription) {
+                    response += `Description: ${pageDescription}\n`;
+                }
+                if (pageContent) {
+                    const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH;
+                    let plainTextContent = $('<div>').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' ');
+                    const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/;
+                    const isCode = codePattern.test(plainTextContent);
+
+                    if (isCode) {
+                        plainTextContent = plainTextContent.replace(codePattern, '');
+                    }
+                    // Remove anything enclosed in brackets
+                    plainTextContent = plainTextContent.replace(/ *\([^)]*\) */g, '');
+                    if (plainTextContent.length > MAX_CONTENT_LENGTH) {
+                        plainTextContent = plainTextContent.substring(0, MAX_CONTENT_LENGTH) + '...';
+                    }
+                    response += `Content: ${plainTextContent.trim()}`;
+                }
+                response += `URL: ${url}`;
+
+                // Get the index of the last message in the array
+                const lastMessageIndex = conversation.messages.length - 1;
+
+                // Append a new line and the new content to the existing content of the last message
+                conversation.messages[lastMessageIndex].content += "\n" + response;
+                
+                console.log("A URL was provided, response: " + response)
+
+            } catch (err) {
+                console.error(err);
+                return sendRand(errorMessages);
+            }
+        }
+    }
    const controller = new AbortController();
    const timeout = setTimeout(() => {
        controller.abort();
@ -197,66 +260,68 @@ async function generateResponse(conversation, message) {
    let time = 0
    // define a function that shows the system load percentage and updates the message
    const showSystemLoad = async () => {
-        time = time + 7;
-        cpuStat.usagePercent(function(err, percent, seconds) {
-          if (err) {
-            return console.log(err);
-          }
-      
-          const systemLoad = percent;
-          const freeMemory = os.freemem() / 1024 / 1024 / 1024;
-          const totalMemory = os.totalmem() / 1024 / 1024 / 1024;
-          const usedMemory = totalMemory - freeMemory;
-      
-          const embedData = {
-            color: 0x0099ff,
-            title: 'Please wait.. I am Thinking...',
-            fields: [
-              {
-                name: 'System Load',
-                value: `${systemLoad.toFixed(2)}%`,
-              },
-              {
-                name: 'Memory Usage',
-                value: `${usedMemory.toFixed(2)} GB / ${totalMemory.toFixed(2)} GB`,
-              },
-              {
-                name: 'Time',
-                value: `~${time} seconds.`,
-              },
-            ],
-          };
-      
-          // if the message object doesn't exist, create it
-          if (!botMessage) {
-            (async () => {
-              botMessage = await message.channel.send({ embeds: [embedData] });
-            })();
-          } else {
-            botMessage.edit({ embeds: [embedData] }); // otherwise, update the message
-          }
+        time = Number(time) + Number(process.env.REFRESH_INTERVAL);
+        cpuStat.usagePercent(function (err, percent, seconds) {
+            if (err) {
+                return console.log(err);
+            }
+
+            const systemLoad = percent;
+            const freeMemory = os.freemem() / 1024 / 1024 / 1024;
+            const totalMemory = os.totalmem() / 1024 / 1024 / 1024;
+            const usedMemory = totalMemory - freeMemory;
+
+            const embedData = {
+                color: 0x0099ff,
+                title: 'Please wait.. I am thinking...',
+                fields: [
+                    {
+                        name: 'System Load',
+                        value: `${systemLoad.toFixed(2)}%`,
+                    },
+                    {
+                        name: 'Memory Usage',
+                        value: `${usedMemory.toFixed(2)} GB / ${totalMemory.toFixed(2)} GB`,
+                    },
+                    {
+                        name: 'Time',
+                        value: `~${time} seconds.`,
+                    },
+                ],
+            };
+
+            // if the message object doesn't exist, create it
+            if (!botMessage) {
+                (async () => {
+                    botMessage = await message.channel.send({ embeds: [embedData] });
+                })();
+            } else {
+                botMessage.edit({ embeds: [embedData] }); // otherwise, update the message
+            }
        });
-      };
+    };

    // call the function initially
    await showSystemLoad();

-    // refresh the system load percentage and update the message every 7 seconds
-    const refreshInterval = setInterval(showSystemLoad, 7000);
+    // Grab the REFRESH_INTERVAL from ENV if not exist, lets use 7 (seconds)
+    const refreshInterval = setInterval(showSystemLoad, (process.env.REFRESH_INTERVAL || 7) * 1000);

    try {
        const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
            method: 'POST',
            headers: {
-              'accept': 'application/json',
-              'Content-Type': 'application/json'
+                'accept': 'application/json',
+                'Content-Type': 'application/json'
            },
            body: JSON.stringify({
-              messages: messagesCopy,
-              max_tokens: Number(process.env.MAX_TOKENS) // add the max_tokens parameter here
+                messages: messagesCopy,
+                max_tokens: Number(process.env.MAX_TOKENS), // add the max_tokens parameter here
+                repeat_penalty: Number(process.env.REPEAT_PENALTY)
+
            }),
            signal: controller.signal
-          });
+        });

        const responseData = await response.json();
        console.log(JSON.stringify(responseData));
--- a/package.json
+++ b/package.json
@ -11,6 +11,7 @@
  "license": "ISC",
  "dependencies": {
    "discord.js": "^14.9.0",
+    "cheerio": "^1.0.0-rc.12",
    "dotenv": "^16.0.3",
    "node-fetch": "^3.3.1",
    "os": "^0.1.2",
Author	SHA1	Message	Date
Raven Scott	e138c67fe2	Fix hugging face	2023-05-08 19:10:15 -07:00
Raven Scott	50d947b9c2	adding REPEAT_PENALTY	2023-05-08 19:10:15 -07:00
Raven Scott	b009ace3e7	Fix up env	2023-05-08 19:10:15 -07:00
Raven Scott	385234e05a	Remove the abuse of looping functions	2023-05-08 19:10:15 -07:00
Raven Scott	63a4486745	Remove any @ symbol to remove spam pings	2023-05-08 19:10:15 -07:00
Raven Scott	f4e97d9710	update model var	2023-05-08 19:10:15 -07:00
Raven Scott	4a05334b15	update hf	2023-05-08 19:10:15 -07:00
Raven Scott	cae0112077	update hf	2023-05-08 19:10:15 -07:00
Raven Scott	72059a430a	Revert "Urban" This reverts commit `bd5ef4db9a`.	2023-05-08 19:10:15 -07:00
Raven Scott	45ef05ac45	Revert "Adding urban dictionary lookups" This reverts commit `f87542132e`.	2023-05-08 19:10:15 -07:00
Raven Scott	ca758c5898	Adding urban dictionary lookups	2023-05-08 19:10:15 -07:00
Raven Scott	335f06ff69	Urban	2023-05-08 19:10:15 -07:00
Raven Scott	6b739d2b80	HuggingFace Deploy Instructions	2023-05-08 19:10:15 -07:00
Raven Scott	9b4981c539	HuggingFace Deploy Instructions	2023-05-08 19:10:15 -07:00
Raven Scott	9660806f94	HuggingFace Deploy Instructions	2023-05-08 19:10:15 -07:00
Raven Scott	c28bf9c022	HuggingFace Deploy Instructions	2023-05-08 19:10:15 -07:00
Raven Scott	6531b633f7	Move overflow delay from static to .env	2023-05-08 19:10:15 -07:00
Raven Scott	99ea986bbd	Fix Caching web reading the web: Append last message rather than add to the conversation.	2023-05-08 19:10:15 -07:00
Raven Scott	a34a910200	update default.env	2023-05-08 19:10:15 -07:00
Raven Scott	637c5182a3	remove \t	2023-05-08 19:10:15 -07:00
Raven Scott	5516e4c20b	remove redunant code	2023-05-08 19:10:15 -07:00
Raven Scott	9489068b8e	cleaner output from web scrapes	2023-05-08 19:10:15 -07:00
Raven Scott	f211fe2d67	update	2023-05-08 19:10:15 -07:00
Raven Scott	932474ec63	update	2023-05-08 19:10:15 -07:00
Raven Scott	fb08f0b592	return on error	2023-05-08 19:10:15 -07:00
Raven Scott	9197996ac7	update package.json for cheerio	2023-05-08 19:10:15 -07:00
Raven Scott	016f553192	update	2023-05-08 19:10:15 -07:00
Raven Scott	691b3137c3	Adding: Web Access allowing the AI to browse URLs	2023-05-08 19:10:15 -07:00
Raven Scott	f272839ab1	Fix ShowSystemLoad	2023-05-08 19:10:15 -07:00
Raven Scott	b1c1f6995a	Adding refresh interval to system load status	2023-05-08 19:10:15 -07:00
Raven Scott	9b00f2fcd8	Moving REFRESH_INTERVAL to docker-compose	2023-05-08 19:10:15 -07:00
Raven Scott	38f6c38f6d	Moving REFRESH_INTERVAL to .env	2023-05-08 19:10:15 -07:00
Raven Scott	2771052f23	lowercase Thinking it looks better :P	2023-05-08 19:10:15 -07:00
MrTuxedo	f87b61fb2b	merge upstream -> main	2023-04-30 16:11:40 -07:00
Raven Scott	0caf82d7f6	MAX_TOKENS to int	2023-04-26 19:24:24 -07:00
Raven Scott	d2aae48e33	adding N_THREADS and MAX_TOKENS vars	2023-04-26 19:24:24 -07:00
Raven Scott	3da598c218	Bug Fix: Chunk messages if response is too large	2023-04-26 19:24:24 -07:00