Fix hugging face

adding REPEAT_PENALTY
Fix up env
2023-05-08 19:10:15 -07:00 · 2023-05-08 19:10:15 -07:00 · 2023-05-08 19:10:15 -07:00 · 2023-05-08 19:10:15 -07:00 · 2023-05-08 19:10:15 -07:00 · 2023-05-08 19:10:15 -07:00
8 changed files with 242 additions and 61 deletions
--- a/default.env
+++ b/default.env
@ -1,13 +1,48 @@
 # Discord Token
 THE_TOKEN = "DISCORD_TOKEN_HERE"
 # The Channel IDs the bot will operate in seperated by commas
 CHANNEL_IDS = 1094494101631680653,1094628334727614605
 # The INIT prompt for all conversations.
 INIT_PROMPT = "Assistant name: ChatAI. You code, write and provide any information without any mistakes."
-ROOT_PORT = 8000
+
-DATA_DIR = /home/USERNAME/weights
+# Loading Emebed Refresh Timing
-CACHE = 1
+REFRESH_INTERVAL=10
-N_THREADS = 4
+
 # When a message is too large for discord we chunk the response into seperate messages.
 # To ensure we do not rate limit the bot we send these at a delay interval.
 # DEFAULT: 3 a good setting is between 3 and 7 seconds.
 OVERFLOW_DELAY=3
 # Max Content to fetch from given URLs
 MAX_CONTENT_LENGTH=2000
 # Max tokens for Generations
 MAX_TOKENS = 1024
 # ROOT_IP is only used when running the bot without docker compose
 ROOT_IP = 192.168.0.15
 # PORT  is only used when running the bot without docker compose
 ROOT_PORT = 8000
 # Directory to your models (llama.cpp specfic settings)
 DATA_DIR = /home/USERNAME/weights
 # Enable Expirmental Message Caches (Limited to single session)
 # Cache will use ~1.4 GB or MORE of RAM. ONLY ENABLE IF YOUR SYSTEM CAN HANDLE THIS.
 CACHE = 1
 # Set number of threads to use, currently, a standard thread will utilize 1 whole core
 # I usually will set this between all cores I physcally have OR 2 cores less to allow for other processes.
 N_THREADS = 4
 # Always use MMAP unless you know what you are doing
 USE_MMAP=1
 # Only use MLOCK if you know what it does!
 USE_MLOCK=0
 # The higher the number the more hard core.
 REPEAT_PENALTY=1
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -11,7 +11,7 @@ services:
      - ${DATA_DIR}:/usr/src/app/models
    environment:
      - HOST=llama-python-server
-      - MODEL=./models/gpt4-x-alpaca-13b-native-4bit-128g.bin
+      - MODEL=./models/ggml-vic7b-q4_0.bin
  llama-python-djs-bot:
    container_name: llama-python-djs-bot
    restart: unless-stopped
@ -21,6 +21,7 @@ services:
      - llama-python-server
    environment:
      - THE_TOKEN
      - REFRESH_INTERVAL
      - CHANNEL_IDS
      - ROOT_IP=llama-python-server
      - ROOT_PORT=8000
--- a/huggingface-config/Dockerfile
+++ b/huggingface-config/Dockerfile
@ -0,0 +1,42 @@
 FROM ubuntu:latest
 RUN apt update
 RUN DEBIAN_FRONTEND=noninteractive  apt install curl sudo -y
 RUN curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash - 
 RUN DEBIAN_FRONTEND=noninteractive  apt install nodejs -y
 RUN npm i pm2 -g
 RUN mkdir -p /code/.pm2
 RUN mkdir -p /.pm2
 RUN chmod 0777 /code/.pm2 
 RUN chmod 0777 /.pm2
 RUN DEBIAN_FRONTEND=noninteractive  apt install wget python3 python3-pip -y
 WORKDIR /code
 RUN pip install --no-cache-dir llama-cpp-python[server]
 COPY . .
 RUN npm i
 ENV HOST localhost
 ENV PORT 7860
 ENV MODEL=/code/ggml-vic7b-q4_0.bin
 ENV CACHE=1
 ENV USE_MLOCK=0
 ENV REPEAT_PENALTY=1
 ENV MODEL=/code/ggml-vic7b-q4_0.bin
 ENV PM2_HOME=/code/.pm2
 RUN wget -q https://huggingface.co/eachadea/ggml-vicuna-7b-1.1/resolve/main/ggml-vic7b-q4_0.bin
 CMD /bin/bash /code/start.sh
--- a/huggingface-config/README.md
+++ b/huggingface-config/README.md
@ -0,0 +1,17 @@
 # How to Deploy on Hugging Face
 1) Move the "assets" directory and llamabot.js, package.json files into your HuggingFace repo directory.
 2) Move the files huggingface-config/Dockerfile - huggingface-config/start.sh - huggingface-config/startServices.json into the root of the hugging face repo.
 3) Edit Docker file as you need, the dockerfile is set to automatically download Vicuna 1.1 7B
 3) Move default.env into your repo as .env and edit for your needs
 4) Push the changes
 You should then see the bot being built and deployed on HuggingFace
 pm2 log will run automatically so you can see frontend and backend logs.
 PLEASE NOTE: Your hugging face repo should remain private!
--- a/huggingface-config/start.sh
+++ b/huggingface-config/start.sh
@ -0,0 +1,6 @@
 #!/bin/bash
 # This is the main process of the container and will stay alive as long as pm2 log is running.
 pm2 start startServices.json
 pm2 log
--- a/huggingface-config/startServices.json
+++ b/huggingface-config/startServices.json
@ -0,0 +1,14 @@
 {
    "apps": [
        {
            "name": "Discord-Bot",
            "script": "node /code/llamabot.js",
            "args" : ""
        },
        {
            "name": "AI-API",
            "script": "python3 -m llama_cpp.server",
            "args" : ""
        }
    ]
 }
--- a/llamabot.js
+++ b/llamabot.js
@ -153,15 +153,20 @@ client.on('messageCreate', async (message) => {
            // if we are over the discord char limit we need chunks...
            if (response.length > limit) {
                const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
                if (chunks.length  >= 15) return await message.channel.send("Response chunks too large. Try again");
                for (let i = 0; i < chunks.length; i++) {
                    setTimeout(() => {
                        message.channel.send(chunks[i]);
-                }, i * 3000); // delay of 3 seconds between each chunk to save on API requests
+                    }, i * (process.env.OVERFLOW_DELAY || 3) * 1000); // delay of 3 seconds between each chunk to save on API requests
                }
            } else {
                // We are good to go, send the response
-              await message.channel.send(response);
+                await message.channel.send(response.replace("@", ""));
            }
            setPresenceOnline()
@ -175,17 +180,75 @@ client.on('messageCreate', async (message) => {
            conversation.busy = false;
        }
        conversations.set(userID, conversation); // Update user's conversation map in memory
        console.log(conversation)
    } catch (err) {
        console.error(err);
-        sendRand(errorMessages)
+        return sendRand(errorMessages)
    } finally {
        setPresenceOnline()
        setBusy(message.author.id, false);
    }
 });
 import cheerio from 'cheerio';
 async function generateResponse(conversation, message) {
    // Check if message contains a URL
    const urlRegex = /(https?:\/\/[^\s]+)/g;
    const urls = message.content.match(urlRegex);
    if (urls) {
        // If there are multiple URLs, process them one by one
        for (const url of urls) {
            try {
                const res = await fetch(url);
                const html = await res.text();
                const $ = cheerio.load(html);
                // Extract page title, meta description and content
                const pageTitle = $('head title').text().trim();
                const pageDescription = $('head meta[name="description"]').attr('content');
                const pageContent = $('body').text().trim();
                // Construct response message with page details
                let response = `Title: ${pageTitle}\n`;
                if (pageDescription) {
                    response += `Description: ${pageDescription}\n`;
                }
                if (pageContent) {
                    const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH;
                    let plainTextContent = $('<div>').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' ');
                    const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/;
                    const isCode = codePattern.test(plainTextContent);
                    if (isCode) {
                        plainTextContent = plainTextContent.replace(codePattern, '');
                    }
                    // Remove anything enclosed in brackets
                    plainTextContent = plainTextContent.replace(/ *\([^)]*\) */g, '');
                    if (plainTextContent.length > MAX_CONTENT_LENGTH) {
                        plainTextContent = plainTextContent.substring(0, MAX_CONTENT_LENGTH) + '...';
                    }
                    response += `Content: ${plainTextContent.trim()}`;
                }
                response += `URL: ${url}`;
                // Get the index of the last message in the array
                const lastMessageIndex = conversation.messages.length - 1;
                // Append a new line and the new content to the existing content of the last message
                conversation.messages[lastMessageIndex].content += "\n" + response;
                console.log("A URL was provided, response: " + response)
            } catch (err) {
                console.error(err);
                return sendRand(errorMessages);
            }
        }
    }
    const controller = new AbortController();
    const timeout = setTimeout(() => {
        controller.abort();
@ -197,8 +260,8 @@ async function generateResponse(conversation, message) {
    let time = 0
    // define a function that shows the system load percentage and updates the message
    const showSystemLoad = async () => {
-        time = time + 7;
+        time = Number(time) + Number(process.env.REFRESH_INTERVAL);
-        cpuStat.usagePercent(function(err, percent, seconds) {
+        cpuStat.usagePercent(function (err, percent, seconds) {
            if (err) {
                return console.log(err);
            }
@ -210,7 +273,7 @@ async function generateResponse(conversation, message) {
            const embedData = {
                color: 0x0099ff,
-            title: 'Please wait.. I am Thinking...',
+                title: 'Please wait.. I am thinking...',
                fields: [
                    {
                        name: 'System Load',
@ -241,8 +304,8 @@ async function generateResponse(conversation, message) {
    // call the function initially
    await showSystemLoad();
-    // refresh the system load percentage and update the message every 7 seconds
+    // Grab the REFRESH_INTERVAL from ENV if not exist, lets use 7 (seconds)
-    const refreshInterval = setInterval(showSystemLoad, 7000);
+    const refreshInterval = setInterval(showSystemLoad, (process.env.REFRESH_INTERVAL || 7) * 1000);
    try {
        const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
@ -253,7 +316,9 @@ async function generateResponse(conversation, message) {
            },
            body: JSON.stringify({
                messages: messagesCopy,
-              max_tokens: Number(process.env.MAX_TOKENS) // add the max_tokens parameter here
+                max_tokens: Number(process.env.MAX_TOKENS), // add the max_tokens parameter here
                repeat_penalty: Number(process.env.REPEAT_PENALTY)
            }),
            signal: controller.signal
        });
--- a/package.json
+++ b/package.json
@ -11,6 +11,7 @@
  "license": "ISC",
  "dependencies": {
    "discord.js": "^14.9.0",
    "cheerio": "^1.0.0-rc.12",
    "dotenv": "^16.0.3",
    "node-fetch": "^3.3.1",
    "os": "^0.1.2",
Author	SHA1	Message	Date
Raven Scott	e138c67fe2	Fix hugging face	2023-05-08 19:10:15 -07:00
Raven Scott	50d947b9c2	adding REPEAT_PENALTY	2023-05-08 19:10:15 -07:00
Raven Scott	b009ace3e7	Fix up env	2023-05-08 19:10:15 -07:00
Raven Scott	385234e05a	Remove the abuse of looping functions	2023-05-08 19:10:15 -07:00
Raven Scott	63a4486745	Remove any @ symbol to remove spam pings	2023-05-08 19:10:15 -07:00
Raven Scott	f4e97d9710	update model var	2023-05-08 19:10:15 -07:00
Raven Scott	4a05334b15	update hf	2023-05-08 19:10:15 -07:00
Raven Scott	cae0112077	update hf	2023-05-08 19:10:15 -07:00
Raven Scott	72059a430a	Revert "Urban" This reverts commit `bd5ef4db9a`.	2023-05-08 19:10:15 -07:00
Raven Scott	45ef05ac45	Revert "Adding urban dictionary lookups" This reverts commit `f87542132e`.	2023-05-08 19:10:15 -07:00
Raven Scott	ca758c5898	Adding urban dictionary lookups	2023-05-08 19:10:15 -07:00
Raven Scott	335f06ff69	Urban	2023-05-08 19:10:15 -07:00
Raven Scott	6b739d2b80	HuggingFace Deploy Instructions	2023-05-08 19:10:15 -07:00
Raven Scott	9b4981c539	HuggingFace Deploy Instructions	2023-05-08 19:10:15 -07:00
Raven Scott	9660806f94	HuggingFace Deploy Instructions	2023-05-08 19:10:15 -07:00
Raven Scott	c28bf9c022	HuggingFace Deploy Instructions	2023-05-08 19:10:15 -07:00
Raven Scott	6531b633f7	Move overflow delay from static to .env	2023-05-08 19:10:15 -07:00
Raven Scott	99ea986bbd	Fix Caching web reading the web: Append last message rather than add to the conversation.	2023-05-08 19:10:15 -07:00
Raven Scott	a34a910200	update default.env	2023-05-08 19:10:15 -07:00
Raven Scott	637c5182a3	remove \t	2023-05-08 19:10:15 -07:00
Raven Scott	5516e4c20b	remove redunant code	2023-05-08 19:10:15 -07:00
Raven Scott	9489068b8e	cleaner output from web scrapes	2023-05-08 19:10:15 -07:00
Raven Scott	f211fe2d67	update	2023-05-08 19:10:15 -07:00
Raven Scott	932474ec63	update	2023-05-08 19:10:15 -07:00
Raven Scott	fb08f0b592	return on error	2023-05-08 19:10:15 -07:00
Raven Scott	9197996ac7	update package.json for cheerio	2023-05-08 19:10:15 -07:00
Raven Scott	016f553192	update	2023-05-08 19:10:15 -07:00
Raven Scott	691b3137c3	Adding: Web Access allowing the AI to browse URLs	2023-05-08 19:10:15 -07:00
Raven Scott	f272839ab1	Fix ShowSystemLoad	2023-05-08 19:10:15 -07:00
Raven Scott	b1c1f6995a	Adding refresh interval to system load status	2023-05-08 19:10:15 -07:00
Raven Scott	9b00f2fcd8	Moving REFRESH_INTERVAL to docker-compose	2023-05-08 19:10:15 -07:00
Raven Scott	38f6c38f6d	Moving REFRESH_INTERVAL to .env	2023-05-08 19:10:15 -07:00
Raven Scott	2771052f23	lowercase Thinking it looks better :P	2023-05-08 19:10:15 -07:00
MrTuxedo	f87b61fb2b	merge upstream -> main	2023-04-30 16:11:40 -07:00
Raven Scott	0caf82d7f6	MAX_TOKENS to int	2023-04-26 19:24:24 -07:00
Raven Scott	d2aae48e33	adding N_THREADS and MAX_TOKENS vars	2023-04-26 19:24:24 -07:00
Raven Scott	3da598c218	Bug Fix: Chunk messages if response is too large	2023-04-26 19:24:24 -07:00