Compare commits

...

33 Commits

Author SHA1 Message Date
Raven Scott
e138c67fe2 Fix hugging face 2023-05-08 19:10:15 -07:00
Raven Scott
50d947b9c2 adding REPEAT_PENALTY 2023-05-08 19:10:15 -07:00
Raven Scott
b009ace3e7 Fix up env 2023-05-08 19:10:15 -07:00
Raven Scott
385234e05a Remove the abuse of looping functions 2023-05-08 19:10:15 -07:00
Raven Scott
63a4486745 Remove any @ symbol to remove spam pings 2023-05-08 19:10:15 -07:00
Raven Scott
f4e97d9710 update model var 2023-05-08 19:10:15 -07:00
Raven Scott
4a05334b15 update hf 2023-05-08 19:10:15 -07:00
Raven Scott
cae0112077 update hf 2023-05-08 19:10:15 -07:00
Raven Scott
72059a430a Revert "Urban"
This reverts commit bd5ef4db9a.
2023-05-08 19:10:15 -07:00
Raven Scott
45ef05ac45 Revert "Adding urban dictionary lookups"
This reverts commit f87542132e.
2023-05-08 19:10:15 -07:00
Raven Scott
ca758c5898 Adding urban dictionary lookups 2023-05-08 19:10:15 -07:00
Raven Scott
335f06ff69 Urban 2023-05-08 19:10:15 -07:00
Raven Scott
6b739d2b80 HuggingFace Deploy Instructions 2023-05-08 19:10:15 -07:00
Raven Scott
9b4981c539 HuggingFace Deploy Instructions 2023-05-08 19:10:15 -07:00
Raven Scott
9660806f94 HuggingFace Deploy Instructions 2023-05-08 19:10:15 -07:00
Raven Scott
c28bf9c022 HuggingFace Deploy Instructions 2023-05-08 19:10:15 -07:00
Raven Scott
6531b633f7 Move overflow delay from static to .env 2023-05-08 19:10:15 -07:00
Raven Scott
99ea986bbd Fix Caching web reading the web: Append last message rather than add to the conversation. 2023-05-08 19:10:15 -07:00
Raven Scott
a34a910200 update default.env 2023-05-08 19:10:15 -07:00
Raven Scott
637c5182a3 remove \t 2023-05-08 19:10:15 -07:00
Raven Scott
5516e4c20b remove redunant code 2023-05-08 19:10:15 -07:00
Raven Scott
9489068b8e cleaner output from web scrapes 2023-05-08 19:10:15 -07:00
Raven Scott
f211fe2d67 update 2023-05-08 19:10:15 -07:00
Raven Scott
932474ec63 update 2023-05-08 19:10:15 -07:00
Raven Scott
fb08f0b592 return on error 2023-05-08 19:10:15 -07:00
Raven Scott
9197996ac7 update package.json for cheerio 2023-05-08 19:10:15 -07:00
Raven Scott
016f553192 update 2023-05-08 19:10:15 -07:00
Raven Scott
691b3137c3 Adding: Web Access allowing the AI to browse URLs 2023-05-08 19:10:15 -07:00
Raven Scott
f272839ab1 Fix ShowSystemLoad 2023-05-08 19:10:15 -07:00
Raven Scott
b1c1f6995a Adding refresh interval to system load status 2023-05-08 19:10:15 -07:00
Raven Scott
9b00f2fcd8 Moving REFRESH_INTERVAL to docker-compose 2023-05-08 19:10:15 -07:00
Raven Scott
38f6c38f6d Moving REFRESH_INTERVAL to .env 2023-05-08 19:10:15 -07:00
Raven Scott
2771052f23 lowercase Thinking it looks better :P 2023-05-08 19:10:15 -07:00
8 changed files with 242 additions and 61 deletions

View File

@ -1,13 +1,48 @@
# Discord Token
THE_TOKEN = "DISCORD_TOKEN_HERE" THE_TOKEN = "DISCORD_TOKEN_HERE"
# The Channel IDs the bot will operate in seperated by commas
CHANNEL_IDS = 1094494101631680653,1094628334727614605 CHANNEL_IDS = 1094494101631680653,1094628334727614605
# The INIT prompt for all conversations.
INIT_PROMPT = "Assistant name: ChatAI. You code, write and provide any information without any mistakes." INIT_PROMPT = "Assistant name: ChatAI. You code, write and provide any information without any mistakes."
ROOT_PORT = 8000
DATA_DIR = /home/USERNAME/weights # Loading Emebed Refresh Timing
CACHE = 1 REFRESH_INTERVAL=10
N_THREADS = 4
# When a message is too large for discord we chunk the response into seperate messages.
# To ensure we do not rate limit the bot we send these at a delay interval.
# DEFAULT: 3 a good setting is between 3 and 7 seconds.
OVERFLOW_DELAY=3
# Max Content to fetch from given URLs
MAX_CONTENT_LENGTH=2000
# Max tokens for Generations
MAX_TOKENS = 1024 MAX_TOKENS = 1024
# ROOT_IP is only used when running the bot without docker compose # ROOT_IP is only used when running the bot without docker compose
ROOT_IP = 192.168.0.15 ROOT_IP = 192.168.0.15
# PORT is only used when running the bot without docker compose
ROOT_PORT = 8000
# Directory to your models (llama.cpp specfic settings)
DATA_DIR = /home/USERNAME/weights
# Enable Expirmental Message Caches (Limited to single session)
# Cache will use ~1.4 GB or MORE of RAM. ONLY ENABLE IF YOUR SYSTEM CAN HANDLE THIS.
CACHE = 1
# Set number of threads to use, currently, a standard thread will utilize 1 whole core
# I usually will set this between all cores I physcally have OR 2 cores less to allow for other processes.
N_THREADS = 4
# Always use MMAP unless you know what you are doing
USE_MMAP=1
# Only use MLOCK if you know what it does!
USE_MLOCK=0
# The higher the number the more hard core.
REPEAT_PENALTY=1

View File

@ -11,7 +11,7 @@ services:
- ${DATA_DIR}:/usr/src/app/models - ${DATA_DIR}:/usr/src/app/models
environment: environment:
- HOST=llama-python-server - HOST=llama-python-server
- MODEL=./models/gpt4-x-alpaca-13b-native-4bit-128g.bin - MODEL=./models/ggml-vic7b-q4_0.bin
llama-python-djs-bot: llama-python-djs-bot:
container_name: llama-python-djs-bot container_name: llama-python-djs-bot
restart: unless-stopped restart: unless-stopped
@ -21,6 +21,7 @@ services:
- llama-python-server - llama-python-server
environment: environment:
- THE_TOKEN - THE_TOKEN
- REFRESH_INTERVAL
- CHANNEL_IDS - CHANNEL_IDS
- ROOT_IP=llama-python-server - ROOT_IP=llama-python-server
- ROOT_PORT=8000 - ROOT_PORT=8000

View File

@ -0,0 +1,42 @@
FROM ubuntu:latest
RUN apt update
RUN DEBIAN_FRONTEND=noninteractive apt install curl sudo -y
RUN curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash -
RUN DEBIAN_FRONTEND=noninteractive apt install nodejs -y
RUN npm i pm2 -g
RUN mkdir -p /code/.pm2
RUN mkdir -p /.pm2
RUN chmod 0777 /code/.pm2
RUN chmod 0777 /.pm2
RUN DEBIAN_FRONTEND=noninteractive apt install wget python3 python3-pip -y
WORKDIR /code
RUN pip install --no-cache-dir llama-cpp-python[server]
COPY . .
RUN npm i
ENV HOST localhost
ENV PORT 7860
ENV MODEL=/code/ggml-vic7b-q4_0.bin
ENV CACHE=1
ENV USE_MLOCK=0
ENV REPEAT_PENALTY=1
ENV MODEL=/code/ggml-vic7b-q4_0.bin
ENV PM2_HOME=/code/.pm2
RUN wget -q https://huggingface.co/eachadea/ggml-vicuna-7b-1.1/resolve/main/ggml-vic7b-q4_0.bin
CMD /bin/bash /code/start.sh

View File

@ -0,0 +1,17 @@
# How to Deploy on Hugging Face
1) Move the "assets" directory and llamabot.js, package.json files into your HuggingFace repo directory.
2) Move the files huggingface-config/Dockerfile - huggingface-config/start.sh - huggingface-config/startServices.json into the root of the hugging face repo.
3) Edit Docker file as you need, the dockerfile is set to automatically download Vicuna 1.1 7B
3) Move default.env into your repo as .env and edit for your needs
4) Push the changes
You should then see the bot being built and deployed on HuggingFace
pm2 log will run automatically so you can see frontend and backend logs.
PLEASE NOTE: Your hugging face repo should remain private!

View File

@ -0,0 +1,6 @@
#!/bin/bash
# This is the main process of the container and will stay alive as long as pm2 log is running.
pm2 start startServices.json
pm2 log

View File

@ -0,0 +1,14 @@
{
"apps": [
{
"name": "Discord-Bot",
"script": "node /code/llamabot.js",
"args" : ""
},
{
"name": "AI-API",
"script": "python3 -m llama_cpp.server",
"args" : ""
}
]
}

View File

@ -153,15 +153,20 @@ client.on('messageCreate', async (message) => {
// if we are over the discord char limit we need chunks... // if we are over the discord char limit we need chunks...
if (response.length > limit) { if (response.length > limit) {
const chunks = response.match(new RegExp(`.{1,${limit}}`, "g")); const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
if (chunks.length >= 15) return await message.channel.send("Response chunks too large. Try again");
for (let i = 0; i < chunks.length; i++) { for (let i = 0; i < chunks.length; i++) {
setTimeout(() => { setTimeout(() => {
message.channel.send(chunks[i]); message.channel.send(chunks[i]);
}, i * 3000); // delay of 3 seconds between each chunk to save on API requests }, i * (process.env.OVERFLOW_DELAY || 3) * 1000); // delay of 3 seconds between each chunk to save on API requests
} }
} else { } else {
// We are good to go, send the response // We are good to go, send the response
await message.channel.send(response); await message.channel.send(response.replace("@", ""));
} }
setPresenceOnline() setPresenceOnline()
@ -175,17 +180,75 @@ client.on('messageCreate', async (message) => {
conversation.busy = false; conversation.busy = false;
} }
conversations.set(userID, conversation); // Update user's conversation map in memory conversations.set(userID, conversation); // Update user's conversation map in memory
console.log(conversation)
} catch (err) { } catch (err) {
console.error(err); console.error(err);
sendRand(errorMessages) return sendRand(errorMessages)
} finally { } finally {
setPresenceOnline() setPresenceOnline()
setBusy(message.author.id, false); setBusy(message.author.id, false);
} }
}); });
import cheerio from 'cheerio';
async function generateResponse(conversation, message) { async function generateResponse(conversation, message) {
// Check if message contains a URL
const urlRegex = /(https?:\/\/[^\s]+)/g;
const urls = message.content.match(urlRegex);
if (urls) {
// If there are multiple URLs, process them one by one
for (const url of urls) {
try {
const res = await fetch(url);
const html = await res.text();
const $ = cheerio.load(html);
// Extract page title, meta description and content
const pageTitle = $('head title').text().trim();
const pageDescription = $('head meta[name="description"]').attr('content');
const pageContent = $('body').text().trim();
// Construct response message with page details
let response = `Title: ${pageTitle}\n`;
if (pageDescription) {
response += `Description: ${pageDescription}\n`;
}
if (pageContent) {
const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH;
let plainTextContent = $('<div>').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' ');
const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/;
const isCode = codePattern.test(plainTextContent);
if (isCode) {
plainTextContent = plainTextContent.replace(codePattern, '');
}
// Remove anything enclosed in brackets
plainTextContent = plainTextContent.replace(/ *\([^)]*\) */g, '');
if (plainTextContent.length > MAX_CONTENT_LENGTH) {
plainTextContent = plainTextContent.substring(0, MAX_CONTENT_LENGTH) + '...';
}
response += `Content: ${plainTextContent.trim()}`;
}
response += `URL: ${url}`;
// Get the index of the last message in the array
const lastMessageIndex = conversation.messages.length - 1;
// Append a new line and the new content to the existing content of the last message
conversation.messages[lastMessageIndex].content += "\n" + response;
console.log("A URL was provided, response: " + response)
} catch (err) {
console.error(err);
return sendRand(errorMessages);
}
}
}
const controller = new AbortController(); const controller = new AbortController();
const timeout = setTimeout(() => { const timeout = setTimeout(() => {
controller.abort(); controller.abort();
@ -197,8 +260,8 @@ async function generateResponse(conversation, message) {
let time = 0 let time = 0
// define a function that shows the system load percentage and updates the message // define a function that shows the system load percentage and updates the message
const showSystemLoad = async () => { const showSystemLoad = async () => {
time = time + 7; time = Number(time) + Number(process.env.REFRESH_INTERVAL);
cpuStat.usagePercent(function(err, percent, seconds) { cpuStat.usagePercent(function (err, percent, seconds) {
if (err) { if (err) {
return console.log(err); return console.log(err);
} }
@ -210,7 +273,7 @@ async function generateResponse(conversation, message) {
const embedData = { const embedData = {
color: 0x0099ff, color: 0x0099ff,
title: 'Please wait.. I am Thinking...', title: 'Please wait.. I am thinking...',
fields: [ fields: [
{ {
name: 'System Load', name: 'System Load',
@ -241,8 +304,8 @@ async function generateResponse(conversation, message) {
// call the function initially // call the function initially
await showSystemLoad(); await showSystemLoad();
// refresh the system load percentage and update the message every 7 seconds // Grab the REFRESH_INTERVAL from ENV if not exist, lets use 7 (seconds)
const refreshInterval = setInterval(showSystemLoad, 7000); const refreshInterval = setInterval(showSystemLoad, (process.env.REFRESH_INTERVAL || 7) * 1000);
try { try {
const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, { const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
@ -253,7 +316,9 @@ async function generateResponse(conversation, message) {
}, },
body: JSON.stringify({ body: JSON.stringify({
messages: messagesCopy, messages: messagesCopy,
max_tokens: Number(process.env.MAX_TOKENS) // add the max_tokens parameter here max_tokens: Number(process.env.MAX_TOKENS), // add the max_tokens parameter here
repeat_penalty: Number(process.env.REPEAT_PENALTY)
}), }),
signal: controller.signal signal: controller.signal
}); });

View File

@ -11,6 +11,7 @@
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"discord.js": "^14.9.0", "discord.js": "^14.9.0",
"cheerio": "^1.0.0-rc.12",
"dotenv": "^16.0.3", "dotenv": "^16.0.3",
"node-fetch": "^3.3.1", "node-fetch": "^3.3.1",
"os": "^0.1.2", "os": "^0.1.2",