Fix hugging face

adding REPEAT_PENALTY
Fix up env
2023-05-08 19:10:15 -07:00 · 2023-05-08 19:10:15 -07:00 · 2023-05-08 19:10:15 -07:00 · 2023-05-08 19:10:15 -07:00 · 2023-05-08 19:10:15 -07:00 · 2023-05-08 19:10:15 -07:00
10 changed files with 309 additions and 25 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,2 @@
 .env
 node_modules/
-package-lock.json
--- a/README.md
+++ b/README.md
@ -32,6 +32,9 @@ Define a generateResponse function that sends a request to the GPT-3 API to gene

 Call the generateResponse function within the messageCreate event listener function.

+![demo](https://media.discordapp.net/attachments/562897071326101515/1095738407826767922/image.png?width=1038&height=660 "demo")
+
+
 # Backend REQUIIRED

 The HTTP Server from https://abetlen.github.io/llama-cpp-python/ is required to use this bot.
@ -48,7 +51,7 @@ python3 -m llama_cpp.server

 Navigate to http://localhost:8000/docs to see the OpenAPI documentation.

-# Usage
+# Static Usage

 1) Use ```npm i ```

@ -60,5 +63,19 @@ Navigate to http://localhost:8000/docs to see the OpenAPI documentation.

 6) Run the bot ```node llamabot.js	```

+# Docker Compose 
+This will automatically configure the API for you as well as the bot in two seperate containers within a stack.
+
+1. `git clone https://git.ssh.surf/snxraven/llama-cpp-python-djs-bot.git`
+
+
+2. `cp default.env .env`
+
+3. Set DATA_DIR in .env to the exact location of your model files.
+
+4. Edit docker-compose.yaml MODEL to ensure the correct model bin is set
+
+5. `docker compose up -d`
+

 Want to make this better? Issue a pull request!
--- a/default.env
+++ b/default.env
@ -1,5 +1,48 @@
+# Discord Token
 THE_TOKEN = "DISCORD_TOKEN_HERE"
+
+# The Channel IDs the bot will operate in seperated by commas
 CHANNEL_IDS = 1094494101631680653,1094628334727614605
+
+# The INIT prompt for all conversations.
+INIT_PROMPT = "Assistant name: ChatAI. You code, write and provide any information without any mistakes."
+
+# Loading Emebed Refresh Timing
+REFRESH_INTERVAL=10
+
+# When a message is too large for discord we chunk the response into seperate messages.
+# To ensure we do not rate limit the bot we send these at a delay interval.
+# DEFAULT: 3 a good setting is between 3 and 7 seconds.
+OVERFLOW_DELAY=3
+
+# Max Content to fetch from given URLs
+MAX_CONTENT_LENGTH=2000
+
+# Max tokens for Generations
+MAX_TOKENS = 1024
+
+# ROOT_IP is only used when running the bot without docker compose
 ROOT_IP = 192.168.0.15
+
+# PORT  is only used when running the bot without docker compose
 ROOT_PORT = 8000
-DATA_DIR = /home/USERNAME/weights
+
+# Directory to your models (llama.cpp specfic settings)
+DATA_DIR = /home/USERNAME/weights
+
+# Enable Expirmental Message Caches (Limited to single session)
+# Cache will use ~1.4 GB or MORE of RAM. ONLY ENABLE IF YOUR SYSTEM CAN HANDLE THIS.
+CACHE = 1
+
+# Set number of threads to use, currently, a standard thread will utilize 1 whole core
+# I usually will set this between all cores I physcally have OR 2 cores less to allow for other processes.
+N_THREADS = 4
+
+# Always use MMAP unless you know what you are doing
+USE_MMAP=1
+
+# Only use MLOCK if you know what it does!
+USE_MLOCK=0
+
+# The higher the number the more hard core.
+REPEAT_PENALTY=1
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -8,10 +8,10 @@ services:
      context: ./server
    env_file: .env
    volumes:
-      - ${DATA_DIR}/weights:/usr/src/app/models
+      - ${DATA_DIR}:/usr/src/app/models
    environment:
      - HOST=llama-python-server
-      - MODEL=./models/gpt4-x-alpaca-13b-native-4bit-128g.bin
+      - MODEL=./models/ggml-vic7b-q4_0.bin
  llama-python-djs-bot:
    container_name: llama-python-djs-bot
    restart: unless-stopped
@ -21,6 +21,8 @@ services:
      - llama-python-server
    environment:
      - THE_TOKEN
+      - REFRESH_INTERVAL
      - CHANNEL_IDS
      - ROOT_IP=llama-python-server
-      - ROOT_PORT=8000
+      - ROOT_PORT=8000
+      - INIT_PROMPT='Assistant Name':' ChatAI. You code, write and provide any information without any mistakes.'
--- a/huggingface-config/Dockerfile
+++ b/huggingface-config/Dockerfile
@ -0,0 +1,42 @@
+FROM ubuntu:latest
+
+RUN apt update
+
+RUN DEBIAN_FRONTEND=noninteractive  apt install curl sudo -y
+
+RUN curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash - 
+
+RUN DEBIAN_FRONTEND=noninteractive  apt install nodejs -y
+
+RUN npm i pm2 -g
+
+RUN mkdir -p /code/.pm2
+
+RUN mkdir -p /.pm2
+
+RUN chmod 0777 /code/.pm2 
+
+RUN chmod 0777 /.pm2
+
+RUN DEBIAN_FRONTEND=noninteractive  apt install wget python3 python3-pip -y
+
+WORKDIR /code
+
+RUN pip install --no-cache-dir llama-cpp-python[server]
+
+COPY . .
+
+RUN npm i
+
+ENV HOST localhost
+ENV PORT 7860
+ENV MODEL=/code/ggml-vic7b-q4_0.bin
+ENV CACHE=1
+ENV USE_MLOCK=0
+ENV REPEAT_PENALTY=1
+ENV MODEL=/code/ggml-vic7b-q4_0.bin
+ENV PM2_HOME=/code/.pm2
+
+RUN wget -q https://huggingface.co/eachadea/ggml-vicuna-7b-1.1/resolve/main/ggml-vic7b-q4_0.bin
+
+CMD /bin/bash /code/start.sh
--- a/huggingface-config/README.md
+++ b/huggingface-config/README.md
@ -0,0 +1,17 @@
+# How to Deploy on Hugging Face
+
+1) Move the "assets" directory and llamabot.js, package.json files into your HuggingFace repo directory.
+
+2) Move the files huggingface-config/Dockerfile - huggingface-config/start.sh - huggingface-config/startServices.json into the root of the hugging face repo.
+
+3) Edit Docker file as you need, the dockerfile is set to automatically download Vicuna 1.1 7B
+
+3) Move default.env into your repo as .env and edit for your needs
+
+4) Push the changes
+
+You should then see the bot being built and deployed on HuggingFace
+
+pm2 log will run automatically so you can see frontend and backend logs.
+
+PLEASE NOTE: Your hugging face repo should remain private!
--- a/huggingface-config/start.sh
+++ b/huggingface-config/start.sh
@ -0,0 +1,6 @@
+#!/bin/bash
+# This is the main process of the container and will stay alive as long as pm2 log is running.
+
+pm2 start startServices.json
+
+pm2 log
--- a/huggingface-config/startServices.json
+++ b/huggingface-config/startServices.json
@ -0,0 +1,14 @@
+{
+    "apps": [
+        {
+            "name": "Discord-Bot",
+            "script": "node /code/llamabot.js",
+            "args" : ""
+        },
+        {
+            "name": "AI-API",
+            "script": "python3 -m llama_cpp.server",
+            "args" : ""
+        }
+    ]
+}
--- a/llamabot.js
+++ b/llamabot.js
@ -3,8 +3,15 @@ import fetch from 'node-fetch';
 import { emptyResponses } from './assets/emptyMessages.js';
 import { resetResponses, userResetMessages } from './assets/resetMessages.js';
 import { errorMessages, busyResponses } from './assets/errorMessages.js';
+import cpuStat from 'cpu-stat';
+import os from 'os';

-import { Client, GatewayIntentBits, ActivityType, Partials } from 'discord.js';
+import {
+    Client,
+    GatewayIntentBits,
+    ActivityType,
+    Partials
+} from 'discord.js';

 const client = new Client({
    intents: [
@ -82,7 +89,7 @@ client.on('messageCreate', async (message) => {

    // Only respond in the specified channels
    if (!channelIDs.includes(message.channel.id)) {
-    return;
+        return;
    }

    if (message.author.bot) return; // Ignore messages from bots
@ -104,15 +111,15 @@ client.on('messageCreate', async (message) => {
    if (conversation.messages.length === 0) {
        conversation.messages.push({
            role: 'user',
-            content: `Your name is rAi, you code, write and provide any information without any mistakes.`
+            content: ` ${process.env.INIT_PROMPT}`
        });
        conversation.messages.push({
            role: 'user',
-            content: `My name is ${message.author.username}.`
+            content: ` User name: ${message.author.username}.`
        });
        conversation.messages.push({
            role: 'assistant',
-            content: `Hello, ${message.author.username}, how may I help you?`
+            content: ` Hello, ${message.author.username}, how may I help you?`
        });
    }

@ -125,14 +132,14 @@ client.on('messageCreate', async (message) => {
    // Append user message to conversation history
    conversation.messages.push({
        role: 'user',
-        content: message.cleanContent
+        content: ` ${message.cleanContent}`
    });

    try {
        setPresenceBusy()
        setBusy(message.author.id, true);

-        const response = await generateResponse(conversation);
+        const response = await generateResponse(conversation, message);

        // Append bot message to conversation history
        conversation.messages.push({
@ -142,7 +149,26 @@ client.on('messageCreate', async (message) => {

        if (response && response.trim()) {
            // Send response to user if it's not empty
-            await message.channel.send(response);
+            const limit = 1980;
+
+            // if we are over the discord char limit we need chunks...
+            if (response.length > limit) {
+
+              
+                const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
+                if (chunks.length  >= 15) return await message.channel.send("Response chunks too large. Try again");
+
+
+                for (let i = 0; i < chunks.length; i++) {
+                    setTimeout(() => {
+                        message.channel.send(chunks[i]);
+                    }, i * (process.env.OVERFLOW_DELAY || 3) * 1000); // delay of 3 seconds between each chunk to save on API requests
+                }
+            } else {
+                // We are good to go, send the response
+                await message.channel.send(response.replace("@", ""));
+            }
+
            setPresenceOnline()
            setBusy(message.author.id, false);
        } else {
@ -154,16 +180,75 @@ client.on('messageCreate', async (message) => {
            conversation.busy = false;
        }
        conversations.set(userID, conversation); // Update user's conversation map in memory
+        console.log(conversation)
+
    } catch (err) {
        console.error(err);
-        sendRand(errorMessages)
+        return sendRand(errorMessages)
    } finally {
        setPresenceOnline()
        setBusy(message.author.id, false);
    }
 });

-async function generateResponse(conversation) {
+import cheerio from 'cheerio';
+
+async function generateResponse(conversation, message) {
+
+    // Check if message contains a URL
+    const urlRegex = /(https?:\/\/[^\s]+)/g;
+    const urls = message.content.match(urlRegex);
+
+    if (urls) {
+        // If there are multiple URLs, process them one by one
+        for (const url of urls) {
+            try {
+                const res = await fetch(url);
+                const html = await res.text();
+                const $ = cheerio.load(html);
+
+                // Extract page title, meta description and content
+                const pageTitle = $('head title').text().trim();
+                const pageDescription = $('head meta[name="description"]').attr('content');
+                const pageContent = $('body').text().trim();
+
+                // Construct response message with page details
+                let response = `Title: ${pageTitle}\n`;
+                if (pageDescription) {
+                    response += `Description: ${pageDescription}\n`;
+                }
+                if (pageContent) {
+                    const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH;
+                    let plainTextContent = $('<div>').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' ');
+                    const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/;
+                    const isCode = codePattern.test(plainTextContent);
+
+                    if (isCode) {
+                        plainTextContent = plainTextContent.replace(codePattern, '');
+                    }
+                    // Remove anything enclosed in brackets
+                    plainTextContent = plainTextContent.replace(/ *\([^)]*\) */g, '');
+                    if (plainTextContent.length > MAX_CONTENT_LENGTH) {
+                        plainTextContent = plainTextContent.substring(0, MAX_CONTENT_LENGTH) + '...';
+                    }
+                    response += `Content: ${plainTextContent.trim()}`;
+                }
+                response += `URL: ${url}`;
+
+                // Get the index of the last message in the array
+                const lastMessageIndex = conversation.messages.length - 1;
+
+                // Append a new line and the new content to the existing content of the last message
+                conversation.messages[lastMessageIndex].content += "\n" + response;
+                
+                console.log("A URL was provided, response: " + response)
+
+            } catch (err) {
+                console.error(err);
+                return sendRand(errorMessages);
+            }
+        }
+    }
    const controller = new AbortController();
    const timeout = setTimeout(() => {
        controller.abort();
@ -171,7 +256,56 @@ async function generateResponse(conversation) {

    const messagesCopy = [...conversation.messages]; // create a copy of the messages array

-    console.log(conversation)
+    let botMessage; // define a variable to hold the message object
+    let time = 0
+    // define a function that shows the system load percentage and updates the message
+    const showSystemLoad = async () => {
+        time = Number(time) + Number(process.env.REFRESH_INTERVAL);
+        cpuStat.usagePercent(function (err, percent, seconds) {
+            if (err) {
+                return console.log(err);
+            }
+
+            const systemLoad = percent;
+            const freeMemory = os.freemem() / 1024 / 1024 / 1024;
+            const totalMemory = os.totalmem() / 1024 / 1024 / 1024;
+            const usedMemory = totalMemory - freeMemory;
+
+            const embedData = {
+                color: 0x0099ff,
+                title: 'Please wait.. I am thinking...',
+                fields: [
+                    {
+                        name: 'System Load',
+                        value: `${systemLoad.toFixed(2)}%`,
+                    },
+                    {
+                        name: 'Memory Usage',
+                        value: `${usedMemory.toFixed(2)} GB / ${totalMemory.toFixed(2)} GB`,
+                    },
+                    {
+                        name: 'Time',
+                        value: `~${time} seconds.`,
+                    },
+                ],
+            };
+
+            // if the message object doesn't exist, create it
+            if (!botMessage) {
+                (async () => {
+                    botMessage = await message.channel.send({ embeds: [embedData] });
+                })();
+            } else {
+                botMessage.edit({ embeds: [embedData] }); // otherwise, update the message
+            }
+        });
+    };
+
+    // call the function initially
+    await showSystemLoad();
+
+    // Grab the REFRESH_INTERVAL from ENV if not exist, lets use 7 (seconds)
+    const refreshInterval = setInterval(showSystemLoad, (process.env.REFRESH_INTERVAL || 7) * 1000);

    try {
        const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
@ -181,25 +315,32 @@ async function generateResponse(conversation) {
                'Content-Type': 'application/json'
            },
            body: JSON.stringify({
-                messages: messagesCopy // use the copy of the messages array
+                messages: messagesCopy,
+                max_tokens: Number(process.env.MAX_TOKENS), // add the max_tokens parameter here
+                repeat_penalty: Number(process.env.REPEAT_PENALTY)
+
            }),
            signal: controller.signal
        });

        const responseData = await response.json();
-        console.log(JSON.stringify(responseData))
+        console.log(JSON.stringify(responseData));
        const choice = responseData.choices[0];

-        // Remove "user None:" and any text after it from the response
-        const responseText = choice.message.content.trim();
-        const startIndex = responseText.indexOf('user None:');
-        const sanitizedResponse = startIndex === -1 ? responseText : responseText.substring(0, startIndex);
+        const responseText = choice.message.content;
+
+        // clear the interval, replace the "please wait" message with the response, and update the message
+        clearInterval(refreshInterval);
+        console.log(responseText);
+        botMessage.delete()
+
+        return responseText;

-        return sanitizedResponse;
    } catch (err) {
        throw err;
    } finally {
        clearTimeout(timeout);
+        time = 0
    }
 }

--- a/package.json
+++ b/package.json
@ -11,7 +11,10 @@
  "license": "ISC",
  "dependencies": {
    "discord.js": "^14.9.0",
+    "cheerio": "^1.0.0-rc.12",
    "dotenv": "^16.0.3",
-    "node-fetch": "^3.3.1"
+    "node-fetch": "^3.3.1",
+    "os": "^0.1.2",
+    "cpu-stat": "^2.0.1"
  }
 }
Author	SHA1	Message	Date
Raven Scott	e138c67fe2	Fix hugging face	2023-05-08 19:10:15 -07:00
Raven Scott	50d947b9c2	adding REPEAT_PENALTY	2023-05-08 19:10:15 -07:00
Raven Scott	b009ace3e7	Fix up env	2023-05-08 19:10:15 -07:00
Raven Scott	385234e05a	Remove the abuse of looping functions	2023-05-08 19:10:15 -07:00
Raven Scott	63a4486745	Remove any @ symbol to remove spam pings	2023-05-08 19:10:15 -07:00
Raven Scott	f4e97d9710	update model var	2023-05-08 19:10:15 -07:00
Raven Scott	4a05334b15	update hf	2023-05-08 19:10:15 -07:00
Raven Scott	cae0112077	update hf	2023-05-08 19:10:15 -07:00
Raven Scott	72059a430a	Revert "Urban" This reverts commit `bd5ef4db9a`.	2023-05-08 19:10:15 -07:00
Raven Scott	45ef05ac45	Revert "Adding urban dictionary lookups" This reverts commit `f87542132e`.	2023-05-08 19:10:15 -07:00
Raven Scott	ca758c5898	Adding urban dictionary lookups	2023-05-08 19:10:15 -07:00
Raven Scott	335f06ff69	Urban	2023-05-08 19:10:15 -07:00
Raven Scott	6b739d2b80	HuggingFace Deploy Instructions	2023-05-08 19:10:15 -07:00
Raven Scott	9b4981c539	HuggingFace Deploy Instructions	2023-05-08 19:10:15 -07:00
Raven Scott	9660806f94	HuggingFace Deploy Instructions	2023-05-08 19:10:15 -07:00
Raven Scott	c28bf9c022	HuggingFace Deploy Instructions	2023-05-08 19:10:15 -07:00
Raven Scott	6531b633f7	Move overflow delay from static to .env	2023-05-08 19:10:15 -07:00
Raven Scott	99ea986bbd	Fix Caching web reading the web: Append last message rather than add to the conversation.	2023-05-08 19:10:15 -07:00
Raven Scott	a34a910200	update default.env	2023-05-08 19:10:15 -07:00
Raven Scott	637c5182a3	remove \t	2023-05-08 19:10:15 -07:00
Raven Scott	5516e4c20b	remove redunant code	2023-05-08 19:10:15 -07:00
Raven Scott	9489068b8e	cleaner output from web scrapes	2023-05-08 19:10:15 -07:00
Raven Scott	f211fe2d67	update	2023-05-08 19:10:15 -07:00
Raven Scott	932474ec63	update	2023-05-08 19:10:15 -07:00
Raven Scott	fb08f0b592	return on error	2023-05-08 19:10:15 -07:00
Raven Scott	9197996ac7	update package.json for cheerio	2023-05-08 19:10:15 -07:00
Raven Scott	016f553192	update	2023-05-08 19:10:15 -07:00
Raven Scott	691b3137c3	Adding: Web Access allowing the AI to browse URLs	2023-05-08 19:10:15 -07:00
Raven Scott	f272839ab1	Fix ShowSystemLoad	2023-05-08 19:10:15 -07:00
Raven Scott	b1c1f6995a	Adding refresh interval to system load status	2023-05-08 19:10:15 -07:00
Raven Scott	9b00f2fcd8	Moving REFRESH_INTERVAL to docker-compose	2023-05-08 19:10:15 -07:00
Raven Scott	38f6c38f6d	Moving REFRESH_INTERVAL to .env	2023-05-08 19:10:15 -07:00
Raven Scott	2771052f23	lowercase Thinking it looks better :P	2023-05-08 19:10:15 -07:00
MrTuxedo	f87b61fb2b	merge upstream -> main	2023-04-30 16:11:40 -07:00
Raven Scott	c20ba21180	adding embed to generation processor	2023-04-28 16:48:51 +02:00
Raven Scott	38fba90d30	adding embed to generation processor	2023-04-28 16:44:07 +02:00
Raven Scott	0caf82d7f6	MAX_TOKENS to int	2023-04-26 19:24:24 -07:00
Raven Scott	d2aae48e33	adding N_THREADS and MAX_TOKENS vars	2023-04-26 19:24:24 -07:00
Raven Scott	3da598c218	Bug Fix: Chunk messages if response is too large	2023-04-26 19:24:24 -07:00
Raven Scott	735b94360a	MAX_TOKENS to int	2023-04-27 03:42:52 +02:00
Raven Scott	da5e2e6e7d	adding N_THREADS and MAX_TOKENS vars	2023-04-27 02:56:27 +02:00
snxraven	3e220b6bbb	Merge pull request 'Add init prompt to docker-compose.yaml' (#1 ) from MrTuxedo/llama-cpp-python-djs-bot:main into main Reviewed-on: snxraven/llama-cpp-python-djs-bot#1	2023-04-18 21:04:13 +00:00
GooeyTuxedo	30e07afa85	add init prompt to docker-compose.yaml	2023-04-16 19:48:08 -07:00
Raven Scott	1d0b225438	Bug Fix: Chunk messages if response is too large	2023-04-17 04:39:24 +02:00
Raven Scott	c97e525d45	adding aprox	2023-04-17 04:02:16 +02:00
Raven Scott	3806892349	remove odd artifact	2023-04-17 03:57:02 +02:00
Raven Scott	b72875e744	Adding timer	2023-04-17 03:56:09 +02:00
Raven Scott	fffef84119	Making system status reporting better	2023-04-17 03:17:15 +02:00
Raven Scott	4e69329501	Adding Memory usage to generation	2023-04-17 02:30:38 +02:00
Raven Scott	4ff67ff28b	Adding CPU Percentage during generation.	2023-04-17 02:07:41 +02:00
Raven Scott	c2396f7e5d	Moving init_prompt for chat to .env	2023-04-17 00:50:04 +02:00
Raven Scott	7b3e0c1db2	Removing post proceess of output	2023-04-16 16:22:57 +02:00
Raven Scott	5f8e57d121	Adding a space before input. Format: User: message Assistant: message	2023-04-16 14:48:18 +02:00
Raven Scott	cc770e617d	fixing prompt	2023-04-16 14:47:04 +02:00
Raven Scott	5a56251e20	Adding cache to server env	2023-04-16 13:22:17 +02:00
Raven Scott	6bb74c8020	update readme	2023-04-12 18:37:02 +02:00
Raven Scott	56c7bfd26d	update readme to add screenshot	2023-04-12 18:29:51 +02:00
Raven Scott	5793b7b4ad	update readme	2023-04-12 16:32:06 +02:00
Raven Scott	cf6e47eebc	update readme	2023-04-12 16:31:29 +02:00
Raven Scott	f98caa23cc	update readme	2023-04-12 16:30:59 +02:00
Raven Scott	d3162bce32	dockerize update	2023-04-12 16:19:27 +02:00
Raven Scott	ec7dbde761	dockerize	2023-04-12 16:17:18 +02:00