Compare commits

...

62 Commits

Author SHA1 Message Date
e138c67fe2 Fix hugging face 2023-05-08 19:10:15 -07:00
50d947b9c2 adding REPEAT_PENALTY 2023-05-08 19:10:15 -07:00
b009ace3e7 Fix up env 2023-05-08 19:10:15 -07:00
385234e05a Remove the abuse of looping functions 2023-05-08 19:10:15 -07:00
63a4486745 Remove any @ symbol to remove spam pings 2023-05-08 19:10:15 -07:00
f4e97d9710 update model var 2023-05-08 19:10:15 -07:00
4a05334b15 update hf 2023-05-08 19:10:15 -07:00
cae0112077 update hf 2023-05-08 19:10:15 -07:00
72059a430a Revert "Urban"
This reverts commit bd5ef4db9a.
2023-05-08 19:10:15 -07:00
45ef05ac45 Revert "Adding urban dictionary lookups"
This reverts commit f87542132e.
2023-05-08 19:10:15 -07:00
ca758c5898 Adding urban dictionary lookups 2023-05-08 19:10:15 -07:00
335f06ff69 Urban 2023-05-08 19:10:15 -07:00
6b739d2b80 HuggingFace Deploy Instructions 2023-05-08 19:10:15 -07:00
9b4981c539 HuggingFace Deploy Instructions 2023-05-08 19:10:15 -07:00
9660806f94 HuggingFace Deploy Instructions 2023-05-08 19:10:15 -07:00
c28bf9c022 HuggingFace Deploy Instructions 2023-05-08 19:10:15 -07:00
6531b633f7 Move overflow delay from static to .env 2023-05-08 19:10:15 -07:00
99ea986bbd Fix Caching web reading the web: Append last message rather than add to the conversation. 2023-05-08 19:10:15 -07:00
a34a910200 update default.env 2023-05-08 19:10:15 -07:00
637c5182a3 remove \t 2023-05-08 19:10:15 -07:00
5516e4c20b remove redunant code 2023-05-08 19:10:15 -07:00
9489068b8e cleaner output from web scrapes 2023-05-08 19:10:15 -07:00
f211fe2d67 update 2023-05-08 19:10:15 -07:00
932474ec63 update 2023-05-08 19:10:15 -07:00
fb08f0b592 return on error 2023-05-08 19:10:15 -07:00
9197996ac7 update package.json for cheerio 2023-05-08 19:10:15 -07:00
016f553192 update 2023-05-08 19:10:15 -07:00
691b3137c3 Adding: Web Access allowing the AI to browse URLs 2023-05-08 19:10:15 -07:00
f272839ab1 Fix ShowSystemLoad 2023-05-08 19:10:15 -07:00
b1c1f6995a Adding refresh interval to system load status 2023-05-08 19:10:15 -07:00
9b00f2fcd8 Moving REFRESH_INTERVAL to docker-compose 2023-05-08 19:10:15 -07:00
38f6c38f6d Moving REFRESH_INTERVAL to .env 2023-05-08 19:10:15 -07:00
2771052f23 lowercase Thinking it looks better :P 2023-05-08 19:10:15 -07:00
f87b61fb2b merge upstream -> main 2023-04-30 16:11:40 -07:00
c20ba21180 adding embed to generation processor 2023-04-28 16:48:51 +02:00
38fba90d30 adding embed to generation processor 2023-04-28 16:44:07 +02:00
0caf82d7f6 MAX_TOKENS to int 2023-04-26 19:24:24 -07:00
d2aae48e33 adding N_THREADS and MAX_TOKENS vars 2023-04-26 19:24:24 -07:00
3da598c218 Bug Fix: Chunk messages if response is too large 2023-04-26 19:24:24 -07:00
735b94360a MAX_TOKENS to int 2023-04-27 03:42:52 +02:00
da5e2e6e7d adding N_THREADS and MAX_TOKENS vars 2023-04-27 02:56:27 +02:00
3e220b6bbb Merge pull request 'Add init prompt to docker-compose.yaml' (#1) from MrTuxedo/llama-cpp-python-djs-bot:main into main
Reviewed-on: snxraven/llama-cpp-python-djs-bot#1
2023-04-18 21:04:13 +00:00
30e07afa85 add init prompt to docker-compose.yaml 2023-04-16 19:48:08 -07:00
1d0b225438 Bug Fix: Chunk messages if response is too large 2023-04-17 04:39:24 +02:00
c97e525d45 adding aprox 2023-04-17 04:02:16 +02:00
3806892349 remove odd artifact 2023-04-17 03:57:02 +02:00
b72875e744 Adding timer 2023-04-17 03:56:09 +02:00
fffef84119 Making system status reporting better 2023-04-17 03:17:15 +02:00
4e69329501 Adding Memory usage to generation 2023-04-17 02:30:38 +02:00
4ff67ff28b Adding CPU Percentage during generation. 2023-04-17 02:07:41 +02:00
c2396f7e5d Moving init_prompt for chat to .env 2023-04-17 00:50:04 +02:00
7b3e0c1db2 Removing post proceess of output 2023-04-16 16:22:57 +02:00
5f8e57d121 Adding a space before input. Format: User: message Assistant: message 2023-04-16 14:48:18 +02:00
cc770e617d fixing prompt 2023-04-16 14:47:04 +02:00
5a56251e20 Adding cache to server env 2023-04-16 13:22:17 +02:00
6bb74c8020 update readme 2023-04-12 18:37:02 +02:00
56c7bfd26d update readme to add screenshot 2023-04-12 18:29:51 +02:00
5793b7b4ad update readme 2023-04-12 16:32:06 +02:00
cf6e47eebc update readme 2023-04-12 16:31:29 +02:00
f98caa23cc update readme 2023-04-12 16:30:59 +02:00
d3162bce32 dockerize update 2023-04-12 16:19:27 +02:00
ec7dbde761 dockerize 2023-04-12 16:17:18 +02:00
15 changed files with 381 additions and 44 deletions

2
.gitignore vendored
View File

@ -1,2 +1,2 @@
.env
node_modules/

10
Dockerfile Normal file
View File

@ -0,0 +1,10 @@
FROM node:slim
WORKDIR /app
COPY package*.json ./
RUN npm install --omit=dev
COPY . .
CMD node llamabot.js

View File

@ -32,6 +32,9 @@ Define a generateResponse function that sends a request to the GPT-3 API to gene
Call the generateResponse function within the messageCreate event listener function.
![demo](https://media.discordapp.net/attachments/562897071326101515/1095738407826767922/image.png?width=1038&height=660 "demo")
# Backend REQUIIRED
The HTTP Server from https://abetlen.github.io/llama-cpp-python/ is required to use this bot.
@ -48,7 +51,7 @@ python3 -m llama_cpp.server
Navigate to http://localhost:8000/docs to see the OpenAPI documentation.
# Usage
# Static Usage
1) Use ```npm i ```
@ -60,5 +63,19 @@ Navigate to http://localhost:8000/docs to see the OpenAPI documentation.
6) Run the bot ```node llamabot.js ```
# Docker Compose
This will automatically configure the API for you as well as the bot in two seperate containers within a stack.
1. `git clone https://git.ssh.surf/snxraven/llama-cpp-python-djs-bot.git`
2. `cp default.env .env`
3. Set DATA_DIR in .env to the exact location of your model files.
4. Edit docker-compose.yaml MODEL to ensure the correct model bin is set
5. `docker compose up -d`
Want to make this better? Issue a pull request!

View File

@ -1,4 +1,4 @@
const emptyResponses = [
export const emptyResponses = [
"Oh boy, this is a tough one! Unfortunately, I don't have much insight to share on this topic.",
"Hmm, I'm scratching my head on this one. Sorry to say that I don't have much to offer.",
"Wish I had a witty remark, but alas, I don't have much to contribute to this discussion.",
@ -28,5 +28,3 @@ const emptyResponses = [
"I'm afraid I don't have much experience with this, so I can't be of much help.",
"I wish I had some grand insight to share, but alas, I don't have much to offer in that regard."
];
module.exports = emptyResponses;

View File

@ -1,4 +1,4 @@
const errorMessages = [
export const errorMessages = [
"Uh oh, looks like something went awry! Try !reset to start fresh.",
"Oops, we hit a bump in the road! Give !reset a try to start anew.",
"We've encountered an error, but !reset can help us out! Give it a go.",
@ -21,7 +21,7 @@ const errorMessages = [
"Oopsie daisy! Give !reset a try and we'll start over."
];
const busyResponses = [
export const busyResponses = [
"Sorry about that! Looks like I'm tied up at the moment. Please try again later.",
"Oops, I'm currently busy with something else. Please try again later.",
"Looks like I'm already working on something. Can you try again later?",
@ -43,5 +43,3 @@ const errorMessages = [
"Looks like I'm currently engaged with something else. Please try again later.",
"I'm currently unavailable. Can you try again later?"
];
module.exports = { errorMessages, busyResponses};

View File

@ -1,4 +1,4 @@
const resetResponses = [
export const resetResponses = [
"Whoops, let's start fresh! What can I assist you with now?",
"Looks like we need a fresh start! What do you need help with?",
"To avoid any gremlins in the system, let's reset! How can I assist you now?",
@ -21,7 +21,7 @@ const resetResponses = [
"Let's hit the restart button to make sure we're on the right track. What can I help you with now?"
];
const userResetMessages = [
export const userResetMessages = [
"All good, we're starting fresh! How can I assist you?",
"Got it, let's start over! How can I help you today?",
"Alright, starting anew! What can I help you with?",
@ -43,5 +43,3 @@ const resetResponses = [
"Sure thing, we'll start over! What can I help you with today?",
"Conversation reset, confirmed! What do you need help with?"
];
module.exports = {resetResponses, userResetMessages};

View File

@ -1,4 +1,48 @@
# Discord Token
THE_TOKEN = "DISCORD_TOKEN_HERE"
# The Channel IDs the bot will operate in seperated by commas
CHANNEL_IDS = 1094494101631680653,1094628334727614605
# The INIT prompt for all conversations.
INIT_PROMPT = "Assistant name: ChatAI. You code, write and provide any information without any mistakes."
# Loading Emebed Refresh Timing
REFRESH_INTERVAL=10
# When a message is too large for discord we chunk the response into seperate messages.
# To ensure we do not rate limit the bot we send these at a delay interval.
# DEFAULT: 3 a good setting is between 3 and 7 seconds.
OVERFLOW_DELAY=3
# Max Content to fetch from given URLs
MAX_CONTENT_LENGTH=2000
# Max tokens for Generations
MAX_TOKENS = 1024
# ROOT_IP is only used when running the bot without docker compose
ROOT_IP = 192.168.0.15
# PORT is only used when running the bot without docker compose
ROOT_PORT = 8000
# Directory to your models (llama.cpp specfic settings)
DATA_DIR = /home/USERNAME/weights
# Enable Expirmental Message Caches (Limited to single session)
# Cache will use ~1.4 GB or MORE of RAM. ONLY ENABLE IF YOUR SYSTEM CAN HANDLE THIS.
CACHE = 1
# Set number of threads to use, currently, a standard thread will utilize 1 whole core
# I usually will set this between all cores I physcally have OR 2 cores less to allow for other processes.
N_THREADS = 4
# Always use MMAP unless you know what you are doing
USE_MMAP=1
# Only use MLOCK if you know what it does!
USE_MLOCK=0
# The higher the number the more hard core.
REPEAT_PENALTY=1

28
docker-compose.yml Normal file
View File

@ -0,0 +1,28 @@
version: '3.9'
services:
llama-python-server:
container_name: llama-python-server
restart: unless-stopped
build:
context: ./server
env_file: .env
volumes:
- ${DATA_DIR}:/usr/src/app/models
environment:
- HOST=llama-python-server
- MODEL=./models/ggml-vic7b-q4_0.bin
llama-python-djs-bot:
container_name: llama-python-djs-bot
restart: unless-stopped
build:
context: .
depends_on:
- llama-python-server
environment:
- THE_TOKEN
- REFRESH_INTERVAL
- CHANNEL_IDS
- ROOT_IP=llama-python-server
- ROOT_PORT=8000
- INIT_PROMPT='Assistant Name':' ChatAI. You code, write and provide any information without any mistakes.'

View File

@ -0,0 +1,42 @@
FROM ubuntu:latest
RUN apt update
RUN DEBIAN_FRONTEND=noninteractive apt install curl sudo -y
RUN curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash -
RUN DEBIAN_FRONTEND=noninteractive apt install nodejs -y
RUN npm i pm2 -g
RUN mkdir -p /code/.pm2
RUN mkdir -p /.pm2
RUN chmod 0777 /code/.pm2
RUN chmod 0777 /.pm2
RUN DEBIAN_FRONTEND=noninteractive apt install wget python3 python3-pip -y
WORKDIR /code
RUN pip install --no-cache-dir llama-cpp-python[server]
COPY . .
RUN npm i
ENV HOST localhost
ENV PORT 7860
ENV MODEL=/code/ggml-vic7b-q4_0.bin
ENV CACHE=1
ENV USE_MLOCK=0
ENV REPEAT_PENALTY=1
ENV MODEL=/code/ggml-vic7b-q4_0.bin
ENV PM2_HOME=/code/.pm2
RUN wget -q https://huggingface.co/eachadea/ggml-vicuna-7b-1.1/resolve/main/ggml-vic7b-q4_0.bin
CMD /bin/bash /code/start.sh

View File

@ -0,0 +1,17 @@
# How to Deploy on Hugging Face
1) Move the "assets" directory and llamabot.js, package.json files into your HuggingFace repo directory.
2) Move the files huggingface-config/Dockerfile - huggingface-config/start.sh - huggingface-config/startServices.json into the root of the hugging face repo.
3) Edit Docker file as you need, the dockerfile is set to automatically download Vicuna 1.1 7B
3) Move default.env into your repo as .env and edit for your needs
4) Push the changes
You should then see the bot being built and deployed on HuggingFace
pm2 log will run automatically so you can see frontend and backend logs.
PLEASE NOTE: Your hugging face repo should remain private!

View File

@ -0,0 +1,6 @@
#!/bin/bash
# This is the main process of the container and will stay alive as long as pm2 log is running.
pm2 start startServices.json
pm2 log

View File

@ -0,0 +1,14 @@
{
"apps": [
{
"name": "Discord-Bot",
"script": "node /code/llamabot.js",
"args" : ""
},
{
"name": "AI-API",
"script": "python3 -m llama_cpp.server",
"args" : ""
}
]
}

View File

@ -1,23 +1,23 @@
const Discord = require('discord.js');
const fetch = require('node-fetch');
const emptyResponses = require('./assets/emptyMessages.js');
const { resetResponses, userResetMessages } = require('./assets/resetMessages.js');
const { errorMessages, busyResponses } = require('./assets/errorMessages.js');
import "dotenv/config.js";
import fetch from 'node-fetch';
import { emptyResponses } from './assets/emptyMessages.js';
import { resetResponses, userResetMessages } from './assets/resetMessages.js';
import { errorMessages, busyResponses } from './assets/errorMessages.js';
import cpuStat from 'cpu-stat';
import os from 'os';
require('dotenv').config()
const {
import {
Client,
GatewayIntentBits,
ActivityType,
Partials
} = require('discord.js');
} from 'discord.js';
const client = new Client({
intents: [
GatewayIntentBits.DirectMessages,
GatewayIntentBits.Guilds,
GatewayIntentBits.GuildBans,
GatewayIntentBits.GuildModeration,
GatewayIntentBits.GuildMessages,
GatewayIntentBits.MessageContent,
],
@ -111,15 +111,15 @@ client.on('messageCreate', async (message) => {
if (conversation.messages.length === 0) {
conversation.messages.push({
role: 'user',
content: `Your name is rAi, you code, write and provide any information without any mistakes.`
content: ` ${process.env.INIT_PROMPT}`
});
conversation.messages.push({
role: 'user',
content: `My name is ${message.author.username}.`
content: ` User name: ${message.author.username}.`
});
conversation.messages.push({
role: 'assistant',
content: `Hello, ${message.author.username}, how may I help you?`
content: ` Hello, ${message.author.username}, how may I help you?`
});
}
@ -132,14 +132,14 @@ client.on('messageCreate', async (message) => {
// Append user message to conversation history
conversation.messages.push({
role: 'user',
content: message.cleanContent
content: ` ${message.cleanContent}`
});
try {
setPresenceBusy()
setBusy(message.author.id, true);
const response = await generateResponse(conversation);
const response = await generateResponse(conversation, message);
// Append bot message to conversation history
conversation.messages.push({
@ -149,7 +149,26 @@ client.on('messageCreate', async (message) => {
if (response && response.trim()) {
// Send response to user if it's not empty
await message.channel.send(response);
const limit = 1980;
// if we are over the discord char limit we need chunks...
if (response.length > limit) {
const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
if (chunks.length >= 15) return await message.channel.send("Response chunks too large. Try again");
for (let i = 0; i < chunks.length; i++) {
setTimeout(() => {
message.channel.send(chunks[i]);
}, i * (process.env.OVERFLOW_DELAY || 3) * 1000); // delay of 3 seconds between each chunk to save on API requests
}
} else {
// We are good to go, send the response
await message.channel.send(response.replace("@", ""));
}
setPresenceOnline()
setBusy(message.author.id, false);
} else {
@ -161,16 +180,75 @@ client.on('messageCreate', async (message) => {
conversation.busy = false;
}
conversations.set(userID, conversation); // Update user's conversation map in memory
console.log(conversation)
} catch (err) {
console.error(err);
sendRand(errorMessages)
return sendRand(errorMessages)
} finally {
setPresenceOnline()
setBusy(message.author.id, false);
}
});
async function generateResponse(conversation) {
import cheerio from 'cheerio';
async function generateResponse(conversation, message) {
// Check if message contains a URL
const urlRegex = /(https?:\/\/[^\s]+)/g;
const urls = message.content.match(urlRegex);
if (urls) {
// If there are multiple URLs, process them one by one
for (const url of urls) {
try {
const res = await fetch(url);
const html = await res.text();
const $ = cheerio.load(html);
// Extract page title, meta description and content
const pageTitle = $('head title').text().trim();
const pageDescription = $('head meta[name="description"]').attr('content');
const pageContent = $('body').text().trim();
// Construct response message with page details
let response = `Title: ${pageTitle}\n`;
if (pageDescription) {
response += `Description: ${pageDescription}\n`;
}
if (pageContent) {
const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH;
let plainTextContent = $('<div>').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' ');
const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/;
const isCode = codePattern.test(plainTextContent);
if (isCode) {
plainTextContent = plainTextContent.replace(codePattern, '');
}
// Remove anything enclosed in brackets
plainTextContent = plainTextContent.replace(/ *\([^)]*\) */g, '');
if (plainTextContent.length > MAX_CONTENT_LENGTH) {
plainTextContent = plainTextContent.substring(0, MAX_CONTENT_LENGTH) + '...';
}
response += `Content: ${plainTextContent.trim()}`;
}
response += `URL: ${url}`;
// Get the index of the last message in the array
const lastMessageIndex = conversation.messages.length - 1;
// Append a new line and the new content to the existing content of the last message
conversation.messages[lastMessageIndex].content += "\n" + response;
console.log("A URL was provided, response: " + response)
} catch (err) {
console.error(err);
return sendRand(errorMessages);
}
}
}
const controller = new AbortController();
const timeout = setTimeout(() => {
controller.abort();
@ -178,7 +256,56 @@ async function generateResponse(conversation) {
const messagesCopy = [...conversation.messages]; // create a copy of the messages array
console.log(conversation)
let botMessage; // define a variable to hold the message object
let time = 0
// define a function that shows the system load percentage and updates the message
const showSystemLoad = async () => {
time = Number(time) + Number(process.env.REFRESH_INTERVAL);
cpuStat.usagePercent(function (err, percent, seconds) {
if (err) {
return console.log(err);
}
const systemLoad = percent;
const freeMemory = os.freemem() / 1024 / 1024 / 1024;
const totalMemory = os.totalmem() / 1024 / 1024 / 1024;
const usedMemory = totalMemory - freeMemory;
const embedData = {
color: 0x0099ff,
title: 'Please wait.. I am thinking...',
fields: [
{
name: 'System Load',
value: `${systemLoad.toFixed(2)}%`,
},
{
name: 'Memory Usage',
value: `${usedMemory.toFixed(2)} GB / ${totalMemory.toFixed(2)} GB`,
},
{
name: 'Time',
value: `~${time} seconds.`,
},
],
};
// if the message object doesn't exist, create it
if (!botMessage) {
(async () => {
botMessage = await message.channel.send({ embeds: [embedData] });
})();
} else {
botMessage.edit({ embeds: [embedData] }); // otherwise, update the message
}
});
};
// call the function initially
await showSystemLoad();
// Grab the REFRESH_INTERVAL from ENV if not exist, lets use 7 (seconds)
const refreshInterval = setInterval(showSystemLoad, (process.env.REFRESH_INTERVAL || 7) * 1000);
try {
const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
@ -188,25 +315,32 @@ async function generateResponse(conversation) {
'Content-Type': 'application/json'
},
body: JSON.stringify({
messages: messagesCopy // use the copy of the messages array
messages: messagesCopy,
max_tokens: Number(process.env.MAX_TOKENS), // add the max_tokens parameter here
repeat_penalty: Number(process.env.REPEAT_PENALTY)
}),
signal: controller.signal
});
const responseData = await response.json();
console.log(JSON.stringify(responseData))
console.log(JSON.stringify(responseData));
const choice = responseData.choices[0];
// Remove "user None:" and any text after it from the response
const responseText = choice.message.content.trim();
const startIndex = responseText.indexOf('user None:');
const sanitizedResponse = startIndex === -1 ? responseText : responseText.substring(0, startIndex);
const responseText = choice.message.content;
// clear the interval, replace the "please wait" message with the response, and update the message
clearInterval(refreshInterval);
console.log(responseText);
botMessage.delete()
return responseText;
return sanitizedResponse;
} catch (err) {
throw err;
} finally {
clearTimeout(timeout);
time = 0
}
}

20
package.json Normal file
View File

@ -0,0 +1,20 @@
{
"name": "llama-cpp-python-djs-bot",
"version": "1.0.0",
"description": "",
"main": "llamabot.js",
"type": "module",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"dependencies": {
"discord.js": "^14.9.0",
"cheerio": "^1.0.0-rc.12",
"dotenv": "^16.0.3",
"node-fetch": "^3.3.1",
"os": "^0.1.2",
"cpu-stat": "^2.0.1"
}
}

11
server/Dockerfile Normal file
View File

@ -0,0 +1,11 @@
FROM python:bullseye
RUN apt-get update; \
apt-get install -y --no-install-recommends \
build-essential
WORKDIR /usr/src/app
RUN pip install --no-cache-dir llama-cpp-python[server]
CMD python3 -m llama_cpp.server