Compare commits
37 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
e138c67fe2 | ||
|
50d947b9c2 | ||
|
b009ace3e7 | ||
|
385234e05a | ||
|
63a4486745 | ||
|
f4e97d9710 | ||
|
4a05334b15 | ||
|
cae0112077 | ||
|
72059a430a | ||
|
45ef05ac45 | ||
|
ca758c5898 | ||
|
335f06ff69 | ||
|
6b739d2b80 | ||
|
9b4981c539 | ||
|
9660806f94 | ||
|
c28bf9c022 | ||
|
6531b633f7 | ||
|
99ea986bbd | ||
|
a34a910200 | ||
|
637c5182a3 | ||
|
5516e4c20b | ||
|
9489068b8e | ||
|
f211fe2d67 | ||
|
932474ec63 | ||
|
fb08f0b592 | ||
|
9197996ac7 | ||
|
016f553192 | ||
|
691b3137c3 | ||
|
f272839ab1 | ||
|
b1c1f6995a | ||
|
9b00f2fcd8 | ||
|
38f6c38f6d | ||
|
2771052f23 | ||
f87b61fb2b | |||
|
0caf82d7f6 | ||
|
d2aae48e33 | ||
|
3da598c218 |
43
default.env
43
default.env
@ -1,13 +1,48 @@
|
|||||||
|
# Discord Token
|
||||||
THE_TOKEN = "DISCORD_TOKEN_HERE"
|
THE_TOKEN = "DISCORD_TOKEN_HERE"
|
||||||
|
|
||||||
|
# The Channel IDs the bot will operate in seperated by commas
|
||||||
CHANNEL_IDS = 1094494101631680653,1094628334727614605
|
CHANNEL_IDS = 1094494101631680653,1094628334727614605
|
||||||
|
|
||||||
|
# The INIT prompt for all conversations.
|
||||||
INIT_PROMPT = "Assistant name: ChatAI. You code, write and provide any information without any mistakes."
|
INIT_PROMPT = "Assistant name: ChatAI. You code, write and provide any information without any mistakes."
|
||||||
ROOT_PORT = 8000
|
|
||||||
DATA_DIR = /home/USERNAME/weights
|
# Loading Emebed Refresh Timing
|
||||||
CACHE = 1
|
REFRESH_INTERVAL=10
|
||||||
N_THREADS = 4
|
|
||||||
|
# When a message is too large for discord we chunk the response into seperate messages.
|
||||||
|
# To ensure we do not rate limit the bot we send these at a delay interval.
|
||||||
|
# DEFAULT: 3 a good setting is between 3 and 7 seconds.
|
||||||
|
OVERFLOW_DELAY=3
|
||||||
|
|
||||||
|
# Max Content to fetch from given URLs
|
||||||
|
MAX_CONTENT_LENGTH=2000
|
||||||
|
|
||||||
|
# Max tokens for Generations
|
||||||
MAX_TOKENS = 1024
|
MAX_TOKENS = 1024
|
||||||
|
|
||||||
# ROOT_IP is only used when running the bot without docker compose
|
# ROOT_IP is only used when running the bot without docker compose
|
||||||
ROOT_IP = 192.168.0.15
|
ROOT_IP = 192.168.0.15
|
||||||
|
|
||||||
|
# PORT is only used when running the bot without docker compose
|
||||||
|
ROOT_PORT = 8000
|
||||||
|
|
||||||
|
# Directory to your models (llama.cpp specfic settings)
|
||||||
|
DATA_DIR = /home/USERNAME/weights
|
||||||
|
|
||||||
|
# Enable Expirmental Message Caches (Limited to single session)
|
||||||
|
# Cache will use ~1.4 GB or MORE of RAM. ONLY ENABLE IF YOUR SYSTEM CAN HANDLE THIS.
|
||||||
|
CACHE = 1
|
||||||
|
|
||||||
|
# Set number of threads to use, currently, a standard thread will utilize 1 whole core
|
||||||
|
# I usually will set this between all cores I physcally have OR 2 cores less to allow for other processes.
|
||||||
|
N_THREADS = 4
|
||||||
|
|
||||||
|
# Always use MMAP unless you know what you are doing
|
||||||
|
USE_MMAP=1
|
||||||
|
|
||||||
|
# Only use MLOCK if you know what it does!
|
||||||
|
USE_MLOCK=0
|
||||||
|
|
||||||
|
# The higher the number the more hard core.
|
||||||
|
REPEAT_PENALTY=1
|
@ -11,7 +11,7 @@ services:
|
|||||||
- ${DATA_DIR}:/usr/src/app/models
|
- ${DATA_DIR}:/usr/src/app/models
|
||||||
environment:
|
environment:
|
||||||
- HOST=llama-python-server
|
- HOST=llama-python-server
|
||||||
- MODEL=./models/gpt4-x-alpaca-13b-native-4bit-128g.bin
|
- MODEL=./models/ggml-vic7b-q4_0.bin
|
||||||
llama-python-djs-bot:
|
llama-python-djs-bot:
|
||||||
container_name: llama-python-djs-bot
|
container_name: llama-python-djs-bot
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
@ -21,6 +21,7 @@ services:
|
|||||||
- llama-python-server
|
- llama-python-server
|
||||||
environment:
|
environment:
|
||||||
- THE_TOKEN
|
- THE_TOKEN
|
||||||
|
- REFRESH_INTERVAL
|
||||||
- CHANNEL_IDS
|
- CHANNEL_IDS
|
||||||
- ROOT_IP=llama-python-server
|
- ROOT_IP=llama-python-server
|
||||||
- ROOT_PORT=8000
|
- ROOT_PORT=8000
|
||||||
|
42
huggingface-config/Dockerfile
Normal file
42
huggingface-config/Dockerfile
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
FROM ubuntu:latest
|
||||||
|
|
||||||
|
RUN apt update
|
||||||
|
|
||||||
|
RUN DEBIAN_FRONTEND=noninteractive apt install curl sudo -y
|
||||||
|
|
||||||
|
RUN curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash -
|
||||||
|
|
||||||
|
RUN DEBIAN_FRONTEND=noninteractive apt install nodejs -y
|
||||||
|
|
||||||
|
RUN npm i pm2 -g
|
||||||
|
|
||||||
|
RUN mkdir -p /code/.pm2
|
||||||
|
|
||||||
|
RUN mkdir -p /.pm2
|
||||||
|
|
||||||
|
RUN chmod 0777 /code/.pm2
|
||||||
|
|
||||||
|
RUN chmod 0777 /.pm2
|
||||||
|
|
||||||
|
RUN DEBIAN_FRONTEND=noninteractive apt install wget python3 python3-pip -y
|
||||||
|
|
||||||
|
WORKDIR /code
|
||||||
|
|
||||||
|
RUN pip install --no-cache-dir llama-cpp-python[server]
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN npm i
|
||||||
|
|
||||||
|
ENV HOST localhost
|
||||||
|
ENV PORT 7860
|
||||||
|
ENV MODEL=/code/ggml-vic7b-q4_0.bin
|
||||||
|
ENV CACHE=1
|
||||||
|
ENV USE_MLOCK=0
|
||||||
|
ENV REPEAT_PENALTY=1
|
||||||
|
ENV MODEL=/code/ggml-vic7b-q4_0.bin
|
||||||
|
ENV PM2_HOME=/code/.pm2
|
||||||
|
|
||||||
|
RUN wget -q https://huggingface.co/eachadea/ggml-vicuna-7b-1.1/resolve/main/ggml-vic7b-q4_0.bin
|
||||||
|
|
||||||
|
CMD /bin/bash /code/start.sh
|
17
huggingface-config/README.md
Normal file
17
huggingface-config/README.md
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
# How to Deploy on Hugging Face
|
||||||
|
|
||||||
|
1) Move the "assets" directory and llamabot.js, package.json files into your HuggingFace repo directory.
|
||||||
|
|
||||||
|
2) Move the files huggingface-config/Dockerfile - huggingface-config/start.sh - huggingface-config/startServices.json into the root of the hugging face repo.
|
||||||
|
|
||||||
|
3) Edit Docker file as you need, the dockerfile is set to automatically download Vicuna 1.1 7B
|
||||||
|
|
||||||
|
3) Move default.env into your repo as .env and edit for your needs
|
||||||
|
|
||||||
|
4) Push the changes
|
||||||
|
|
||||||
|
You should then see the bot being built and deployed on HuggingFace
|
||||||
|
|
||||||
|
pm2 log will run automatically so you can see frontend and backend logs.
|
||||||
|
|
||||||
|
PLEASE NOTE: Your hugging face repo should remain private!
|
6
huggingface-config/start.sh
Normal file
6
huggingface-config/start.sh
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# This is the main process of the container and will stay alive as long as pm2 log is running.
|
||||||
|
|
||||||
|
pm2 start startServices.json
|
||||||
|
|
||||||
|
pm2 log
|
14
huggingface-config/startServices.json
Normal file
14
huggingface-config/startServices.json
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
{
|
||||||
|
"apps": [
|
||||||
|
{
|
||||||
|
"name": "Discord-Bot",
|
||||||
|
"script": "node /code/llamabot.js",
|
||||||
|
"args" : ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "AI-API",
|
||||||
|
"script": "python3 -m llama_cpp.server",
|
||||||
|
"args" : ""
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
169
llamabot.js
169
llamabot.js
@ -153,15 +153,20 @@ client.on('messageCreate', async (message) => {
|
|||||||
|
|
||||||
// if we are over the discord char limit we need chunks...
|
// if we are over the discord char limit we need chunks...
|
||||||
if (response.length > limit) {
|
if (response.length > limit) {
|
||||||
const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
|
|
||||||
for (let i = 0; i < chunks.length; i++) {
|
|
||||||
setTimeout(() => {
|
const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
|
||||||
message.channel.send(chunks[i]);
|
if (chunks.length >= 15) return await message.channel.send("Response chunks too large. Try again");
|
||||||
}, i * 3000); // delay of 3 seconds between each chunk to save on API requests
|
|
||||||
}
|
|
||||||
|
for (let i = 0; i < chunks.length; i++) {
|
||||||
|
setTimeout(() => {
|
||||||
|
message.channel.send(chunks[i]);
|
||||||
|
}, i * (process.env.OVERFLOW_DELAY || 3) * 1000); // delay of 3 seconds between each chunk to save on API requests
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// We are good to go, send the response
|
// We are good to go, send the response
|
||||||
await message.channel.send(response);
|
await message.channel.send(response.replace("@", ""));
|
||||||
}
|
}
|
||||||
|
|
||||||
setPresenceOnline()
|
setPresenceOnline()
|
||||||
@ -175,17 +180,75 @@ client.on('messageCreate', async (message) => {
|
|||||||
conversation.busy = false;
|
conversation.busy = false;
|
||||||
}
|
}
|
||||||
conversations.set(userID, conversation); // Update user's conversation map in memory
|
conversations.set(userID, conversation); // Update user's conversation map in memory
|
||||||
|
console.log(conversation)
|
||||||
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error(err);
|
console.error(err);
|
||||||
sendRand(errorMessages)
|
return sendRand(errorMessages)
|
||||||
} finally {
|
} finally {
|
||||||
setPresenceOnline()
|
setPresenceOnline()
|
||||||
setBusy(message.author.id, false);
|
setBusy(message.author.id, false);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
import cheerio from 'cheerio';
|
||||||
|
|
||||||
async function generateResponse(conversation, message) {
|
async function generateResponse(conversation, message) {
|
||||||
|
|
||||||
|
// Check if message contains a URL
|
||||||
|
const urlRegex = /(https?:\/\/[^\s]+)/g;
|
||||||
|
const urls = message.content.match(urlRegex);
|
||||||
|
|
||||||
|
if (urls) {
|
||||||
|
// If there are multiple URLs, process them one by one
|
||||||
|
for (const url of urls) {
|
||||||
|
try {
|
||||||
|
const res = await fetch(url);
|
||||||
|
const html = await res.text();
|
||||||
|
const $ = cheerio.load(html);
|
||||||
|
|
||||||
|
// Extract page title, meta description and content
|
||||||
|
const pageTitle = $('head title').text().trim();
|
||||||
|
const pageDescription = $('head meta[name="description"]').attr('content');
|
||||||
|
const pageContent = $('body').text().trim();
|
||||||
|
|
||||||
|
// Construct response message with page details
|
||||||
|
let response = `Title: ${pageTitle}\n`;
|
||||||
|
if (pageDescription) {
|
||||||
|
response += `Description: ${pageDescription}\n`;
|
||||||
|
}
|
||||||
|
if (pageContent) {
|
||||||
|
const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH;
|
||||||
|
let plainTextContent = $('<div>').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' ');
|
||||||
|
const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/;
|
||||||
|
const isCode = codePattern.test(plainTextContent);
|
||||||
|
|
||||||
|
if (isCode) {
|
||||||
|
plainTextContent = plainTextContent.replace(codePattern, '');
|
||||||
|
}
|
||||||
|
// Remove anything enclosed in brackets
|
||||||
|
plainTextContent = plainTextContent.replace(/ *\([^)]*\) */g, '');
|
||||||
|
if (plainTextContent.length > MAX_CONTENT_LENGTH) {
|
||||||
|
plainTextContent = plainTextContent.substring(0, MAX_CONTENT_LENGTH) + '...';
|
||||||
|
}
|
||||||
|
response += `Content: ${plainTextContent.trim()}`;
|
||||||
|
}
|
||||||
|
response += `URL: ${url}`;
|
||||||
|
|
||||||
|
// Get the index of the last message in the array
|
||||||
|
const lastMessageIndex = conversation.messages.length - 1;
|
||||||
|
|
||||||
|
// Append a new line and the new content to the existing content of the last message
|
||||||
|
conversation.messages[lastMessageIndex].content += "\n" + response;
|
||||||
|
|
||||||
|
console.log("A URL was provided, response: " + response)
|
||||||
|
|
||||||
|
} catch (err) {
|
||||||
|
console.error(err);
|
||||||
|
return sendRand(errorMessages);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
const controller = new AbortController();
|
const controller = new AbortController();
|
||||||
const timeout = setTimeout(() => {
|
const timeout = setTimeout(() => {
|
||||||
controller.abort();
|
controller.abort();
|
||||||
@ -197,66 +260,68 @@ async function generateResponse(conversation, message) {
|
|||||||
let time = 0
|
let time = 0
|
||||||
// define a function that shows the system load percentage and updates the message
|
// define a function that shows the system load percentage and updates the message
|
||||||
const showSystemLoad = async () => {
|
const showSystemLoad = async () => {
|
||||||
time = time + 7;
|
time = Number(time) + Number(process.env.REFRESH_INTERVAL);
|
||||||
cpuStat.usagePercent(function(err, percent, seconds) {
|
cpuStat.usagePercent(function (err, percent, seconds) {
|
||||||
if (err) {
|
if (err) {
|
||||||
return console.log(err);
|
return console.log(err);
|
||||||
}
|
}
|
||||||
|
|
||||||
const systemLoad = percent;
|
const systemLoad = percent;
|
||||||
const freeMemory = os.freemem() / 1024 / 1024 / 1024;
|
const freeMemory = os.freemem() / 1024 / 1024 / 1024;
|
||||||
const totalMemory = os.totalmem() / 1024 / 1024 / 1024;
|
const totalMemory = os.totalmem() / 1024 / 1024 / 1024;
|
||||||
const usedMemory = totalMemory - freeMemory;
|
const usedMemory = totalMemory - freeMemory;
|
||||||
|
|
||||||
const embedData = {
|
const embedData = {
|
||||||
color: 0x0099ff,
|
color: 0x0099ff,
|
||||||
title: 'Please wait.. I am Thinking...',
|
title: 'Please wait.. I am thinking...',
|
||||||
fields: [
|
fields: [
|
||||||
{
|
{
|
||||||
name: 'System Load',
|
name: 'System Load',
|
||||||
value: `${systemLoad.toFixed(2)}%`,
|
value: `${systemLoad.toFixed(2)}%`,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: 'Memory Usage',
|
name: 'Memory Usage',
|
||||||
value: `${usedMemory.toFixed(2)} GB / ${totalMemory.toFixed(2)} GB`,
|
value: `${usedMemory.toFixed(2)} GB / ${totalMemory.toFixed(2)} GB`,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: 'Time',
|
name: 'Time',
|
||||||
value: `~${time} seconds.`,
|
value: `~${time} seconds.`,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
|
|
||||||
// if the message object doesn't exist, create it
|
// if the message object doesn't exist, create it
|
||||||
if (!botMessage) {
|
if (!botMessage) {
|
||||||
(async () => {
|
(async () => {
|
||||||
botMessage = await message.channel.send({ embeds: [embedData] });
|
botMessage = await message.channel.send({ embeds: [embedData] });
|
||||||
})();
|
})();
|
||||||
} else {
|
} else {
|
||||||
botMessage.edit({ embeds: [embedData] }); // otherwise, update the message
|
botMessage.edit({ embeds: [embedData] }); // otherwise, update the message
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
// call the function initially
|
// call the function initially
|
||||||
await showSystemLoad();
|
await showSystemLoad();
|
||||||
|
|
||||||
// refresh the system load percentage and update the message every 7 seconds
|
// Grab the REFRESH_INTERVAL from ENV if not exist, lets use 7 (seconds)
|
||||||
const refreshInterval = setInterval(showSystemLoad, 7000);
|
const refreshInterval = setInterval(showSystemLoad, (process.env.REFRESH_INTERVAL || 7) * 1000);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
|
const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: {
|
headers: {
|
||||||
'accept': 'application/json',
|
'accept': 'application/json',
|
||||||
'Content-Type': 'application/json'
|
'Content-Type': 'application/json'
|
||||||
},
|
},
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
messages: messagesCopy,
|
messages: messagesCopy,
|
||||||
max_tokens: Number(process.env.MAX_TOKENS) // add the max_tokens parameter here
|
max_tokens: Number(process.env.MAX_TOKENS), // add the max_tokens parameter here
|
||||||
|
repeat_penalty: Number(process.env.REPEAT_PENALTY)
|
||||||
|
|
||||||
}),
|
}),
|
||||||
signal: controller.signal
|
signal: controller.signal
|
||||||
});
|
});
|
||||||
|
|
||||||
const responseData = await response.json();
|
const responseData = await response.json();
|
||||||
console.log(JSON.stringify(responseData));
|
console.log(JSON.stringify(responseData));
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
"license": "ISC",
|
"license": "ISC",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"discord.js": "^14.9.0",
|
"discord.js": "^14.9.0",
|
||||||
|
"cheerio": "^1.0.0-rc.12",
|
||||||
"dotenv": "^16.0.3",
|
"dotenv": "^16.0.3",
|
||||||
"node-fetch": "^3.3.1",
|
"node-fetch": "^3.3.1",
|
||||||
"os": "^0.1.2",
|
"os": "^0.1.2",
|
||||||
|
Loading…
Reference in New Issue
Block a user