Compare commits
37 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
e138c67fe2 | ||
|
50d947b9c2 | ||
|
b009ace3e7 | ||
|
385234e05a | ||
|
63a4486745 | ||
|
f4e97d9710 | ||
|
4a05334b15 | ||
|
cae0112077 | ||
|
72059a430a | ||
|
45ef05ac45 | ||
|
ca758c5898 | ||
|
335f06ff69 | ||
|
6b739d2b80 | ||
|
9b4981c539 | ||
|
9660806f94 | ||
|
c28bf9c022 | ||
|
6531b633f7 | ||
|
99ea986bbd | ||
|
a34a910200 | ||
|
637c5182a3 | ||
|
5516e4c20b | ||
|
9489068b8e | ||
|
f211fe2d67 | ||
|
932474ec63 | ||
|
fb08f0b592 | ||
|
9197996ac7 | ||
|
016f553192 | ||
|
691b3137c3 | ||
|
f272839ab1 | ||
|
b1c1f6995a | ||
|
9b00f2fcd8 | ||
|
38f6c38f6d | ||
|
2771052f23 | ||
f87b61fb2b | |||
|
0caf82d7f6 | ||
|
d2aae48e33 | ||
|
3da598c218 |
43
default.env
43
default.env
@ -1,13 +1,48 @@
|
||||
# Discord Token
|
||||
THE_TOKEN = "DISCORD_TOKEN_HERE"
|
||||
|
||||
# The Channel IDs the bot will operate in seperated by commas
|
||||
CHANNEL_IDS = 1094494101631680653,1094628334727614605
|
||||
|
||||
# The INIT prompt for all conversations.
|
||||
INIT_PROMPT = "Assistant name: ChatAI. You code, write and provide any information without any mistakes."
|
||||
ROOT_PORT = 8000
|
||||
DATA_DIR = /home/USERNAME/weights
|
||||
CACHE = 1
|
||||
N_THREADS = 4
|
||||
|
||||
# Loading Emebed Refresh Timing
|
||||
REFRESH_INTERVAL=10
|
||||
|
||||
# When a message is too large for discord we chunk the response into seperate messages.
|
||||
# To ensure we do not rate limit the bot we send these at a delay interval.
|
||||
# DEFAULT: 3 a good setting is between 3 and 7 seconds.
|
||||
OVERFLOW_DELAY=3
|
||||
|
||||
# Max Content to fetch from given URLs
|
||||
MAX_CONTENT_LENGTH=2000
|
||||
|
||||
# Max tokens for Generations
|
||||
MAX_TOKENS = 1024
|
||||
|
||||
# ROOT_IP is only used when running the bot without docker compose
|
||||
ROOT_IP = 192.168.0.15
|
||||
|
||||
# PORT is only used when running the bot without docker compose
|
||||
ROOT_PORT = 8000
|
||||
|
||||
# Directory to your models (llama.cpp specfic settings)
|
||||
DATA_DIR = /home/USERNAME/weights
|
||||
|
||||
# Enable Expirmental Message Caches (Limited to single session)
|
||||
# Cache will use ~1.4 GB or MORE of RAM. ONLY ENABLE IF YOUR SYSTEM CAN HANDLE THIS.
|
||||
CACHE = 1
|
||||
|
||||
# Set number of threads to use, currently, a standard thread will utilize 1 whole core
|
||||
# I usually will set this between all cores I physcally have OR 2 cores less to allow for other processes.
|
||||
N_THREADS = 4
|
||||
|
||||
# Always use MMAP unless you know what you are doing
|
||||
USE_MMAP=1
|
||||
|
||||
# Only use MLOCK if you know what it does!
|
||||
USE_MLOCK=0
|
||||
|
||||
# The higher the number the more hard core.
|
||||
REPEAT_PENALTY=1
|
@ -11,7 +11,7 @@ services:
|
||||
- ${DATA_DIR}:/usr/src/app/models
|
||||
environment:
|
||||
- HOST=llama-python-server
|
||||
- MODEL=./models/gpt4-x-alpaca-13b-native-4bit-128g.bin
|
||||
- MODEL=./models/ggml-vic7b-q4_0.bin
|
||||
llama-python-djs-bot:
|
||||
container_name: llama-python-djs-bot
|
||||
restart: unless-stopped
|
||||
@ -21,6 +21,7 @@ services:
|
||||
- llama-python-server
|
||||
environment:
|
||||
- THE_TOKEN
|
||||
- REFRESH_INTERVAL
|
||||
- CHANNEL_IDS
|
||||
- ROOT_IP=llama-python-server
|
||||
- ROOT_PORT=8000
|
||||
|
42
huggingface-config/Dockerfile
Normal file
42
huggingface-config/Dockerfile
Normal file
@ -0,0 +1,42 @@
|
||||
FROM ubuntu:latest
|
||||
|
||||
RUN apt update
|
||||
|
||||
RUN DEBIAN_FRONTEND=noninteractive apt install curl sudo -y
|
||||
|
||||
RUN curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash -
|
||||
|
||||
RUN DEBIAN_FRONTEND=noninteractive apt install nodejs -y
|
||||
|
||||
RUN npm i pm2 -g
|
||||
|
||||
RUN mkdir -p /code/.pm2
|
||||
|
||||
RUN mkdir -p /.pm2
|
||||
|
||||
RUN chmod 0777 /code/.pm2
|
||||
|
||||
RUN chmod 0777 /.pm2
|
||||
|
||||
RUN DEBIAN_FRONTEND=noninteractive apt install wget python3 python3-pip -y
|
||||
|
||||
WORKDIR /code
|
||||
|
||||
RUN pip install --no-cache-dir llama-cpp-python[server]
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN npm i
|
||||
|
||||
ENV HOST localhost
|
||||
ENV PORT 7860
|
||||
ENV MODEL=/code/ggml-vic7b-q4_0.bin
|
||||
ENV CACHE=1
|
||||
ENV USE_MLOCK=0
|
||||
ENV REPEAT_PENALTY=1
|
||||
ENV MODEL=/code/ggml-vic7b-q4_0.bin
|
||||
ENV PM2_HOME=/code/.pm2
|
||||
|
||||
RUN wget -q https://huggingface.co/eachadea/ggml-vicuna-7b-1.1/resolve/main/ggml-vic7b-q4_0.bin
|
||||
|
||||
CMD /bin/bash /code/start.sh
|
17
huggingface-config/README.md
Normal file
17
huggingface-config/README.md
Normal file
@ -0,0 +1,17 @@
|
||||
# How to Deploy on Hugging Face
|
||||
|
||||
1) Move the "assets" directory and llamabot.js, package.json files into your HuggingFace repo directory.
|
||||
|
||||
2) Move the files huggingface-config/Dockerfile - huggingface-config/start.sh - huggingface-config/startServices.json into the root of the hugging face repo.
|
||||
|
||||
3) Edit Docker file as you need, the dockerfile is set to automatically download Vicuna 1.1 7B
|
||||
|
||||
3) Move default.env into your repo as .env and edit for your needs
|
||||
|
||||
4) Push the changes
|
||||
|
||||
You should then see the bot being built and deployed on HuggingFace
|
||||
|
||||
pm2 log will run automatically so you can see frontend and backend logs.
|
||||
|
||||
PLEASE NOTE: Your hugging face repo should remain private!
|
6
huggingface-config/start.sh
Normal file
6
huggingface-config/start.sh
Normal file
@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
# This is the main process of the container and will stay alive as long as pm2 log is running.
|
||||
|
||||
pm2 start startServices.json
|
||||
|
||||
pm2 log
|
14
huggingface-config/startServices.json
Normal file
14
huggingface-config/startServices.json
Normal file
@ -0,0 +1,14 @@
|
||||
{
|
||||
"apps": [
|
||||
{
|
||||
"name": "Discord-Bot",
|
||||
"script": "node /code/llamabot.js",
|
||||
"args" : ""
|
||||
},
|
||||
{
|
||||
"name": "AI-API",
|
||||
"script": "python3 -m llama_cpp.server",
|
||||
"args" : ""
|
||||
}
|
||||
]
|
||||
}
|
83
llamabot.js
83
llamabot.js
@ -153,15 +153,20 @@ client.on('messageCreate', async (message) => {
|
||||
|
||||
// if we are over the discord char limit we need chunks...
|
||||
if (response.length > limit) {
|
||||
|
||||
|
||||
const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
|
||||
if (chunks.length >= 15) return await message.channel.send("Response chunks too large. Try again");
|
||||
|
||||
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
setTimeout(() => {
|
||||
message.channel.send(chunks[i]);
|
||||
}, i * 3000); // delay of 3 seconds between each chunk to save on API requests
|
||||
}, i * (process.env.OVERFLOW_DELAY || 3) * 1000); // delay of 3 seconds between each chunk to save on API requests
|
||||
}
|
||||
} else {
|
||||
// We are good to go, send the response
|
||||
await message.channel.send(response);
|
||||
await message.channel.send(response.replace("@", ""));
|
||||
}
|
||||
|
||||
setPresenceOnline()
|
||||
@ -175,17 +180,75 @@ client.on('messageCreate', async (message) => {
|
||||
conversation.busy = false;
|
||||
}
|
||||
conversations.set(userID, conversation); // Update user's conversation map in memory
|
||||
console.log(conversation)
|
||||
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
sendRand(errorMessages)
|
||||
return sendRand(errorMessages)
|
||||
} finally {
|
||||
setPresenceOnline()
|
||||
setBusy(message.author.id, false);
|
||||
}
|
||||
});
|
||||
|
||||
import cheerio from 'cheerio';
|
||||
|
||||
async function generateResponse(conversation, message) {
|
||||
|
||||
// Check if message contains a URL
|
||||
const urlRegex = /(https?:\/\/[^\s]+)/g;
|
||||
const urls = message.content.match(urlRegex);
|
||||
|
||||
if (urls) {
|
||||
// If there are multiple URLs, process them one by one
|
||||
for (const url of urls) {
|
||||
try {
|
||||
const res = await fetch(url);
|
||||
const html = await res.text();
|
||||
const $ = cheerio.load(html);
|
||||
|
||||
// Extract page title, meta description and content
|
||||
const pageTitle = $('head title').text().trim();
|
||||
const pageDescription = $('head meta[name="description"]').attr('content');
|
||||
const pageContent = $('body').text().trim();
|
||||
|
||||
// Construct response message with page details
|
||||
let response = `Title: ${pageTitle}\n`;
|
||||
if (pageDescription) {
|
||||
response += `Description: ${pageDescription}\n`;
|
||||
}
|
||||
if (pageContent) {
|
||||
const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH;
|
||||
let plainTextContent = $('<div>').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' ');
|
||||
const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/;
|
||||
const isCode = codePattern.test(plainTextContent);
|
||||
|
||||
if (isCode) {
|
||||
plainTextContent = plainTextContent.replace(codePattern, '');
|
||||
}
|
||||
// Remove anything enclosed in brackets
|
||||
plainTextContent = plainTextContent.replace(/ *\([^)]*\) */g, '');
|
||||
if (plainTextContent.length > MAX_CONTENT_LENGTH) {
|
||||
plainTextContent = plainTextContent.substring(0, MAX_CONTENT_LENGTH) + '...';
|
||||
}
|
||||
response += `Content: ${plainTextContent.trim()}`;
|
||||
}
|
||||
response += `URL: ${url}`;
|
||||
|
||||
// Get the index of the last message in the array
|
||||
const lastMessageIndex = conversation.messages.length - 1;
|
||||
|
||||
// Append a new line and the new content to the existing content of the last message
|
||||
conversation.messages[lastMessageIndex].content += "\n" + response;
|
||||
|
||||
console.log("A URL was provided, response: " + response)
|
||||
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
return sendRand(errorMessages);
|
||||
}
|
||||
}
|
||||
}
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => {
|
||||
controller.abort();
|
||||
@ -197,8 +260,8 @@ async function generateResponse(conversation, message) {
|
||||
let time = 0
|
||||
// define a function that shows the system load percentage and updates the message
|
||||
const showSystemLoad = async () => {
|
||||
time = time + 7;
|
||||
cpuStat.usagePercent(function(err, percent, seconds) {
|
||||
time = Number(time) + Number(process.env.REFRESH_INTERVAL);
|
||||
cpuStat.usagePercent(function (err, percent, seconds) {
|
||||
if (err) {
|
||||
return console.log(err);
|
||||
}
|
||||
@ -210,7 +273,7 @@ async function generateResponse(conversation, message) {
|
||||
|
||||
const embedData = {
|
||||
color: 0x0099ff,
|
||||
title: 'Please wait.. I am Thinking...',
|
||||
title: 'Please wait.. I am thinking...',
|
||||
fields: [
|
||||
{
|
||||
name: 'System Load',
|
||||
@ -241,8 +304,8 @@ async function generateResponse(conversation, message) {
|
||||
// call the function initially
|
||||
await showSystemLoad();
|
||||
|
||||
// refresh the system load percentage and update the message every 7 seconds
|
||||
const refreshInterval = setInterval(showSystemLoad, 7000);
|
||||
// Grab the REFRESH_INTERVAL from ENV if not exist, lets use 7 (seconds)
|
||||
const refreshInterval = setInterval(showSystemLoad, (process.env.REFRESH_INTERVAL || 7) * 1000);
|
||||
|
||||
try {
|
||||
const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
|
||||
@ -253,7 +316,9 @@ async function generateResponse(conversation, message) {
|
||||
},
|
||||
body: JSON.stringify({
|
||||
messages: messagesCopy,
|
||||
max_tokens: Number(process.env.MAX_TOKENS) // add the max_tokens parameter here
|
||||
max_tokens: Number(process.env.MAX_TOKENS), // add the max_tokens parameter here
|
||||
repeat_penalty: Number(process.env.REPEAT_PENALTY)
|
||||
|
||||
}),
|
||||
signal: controller.signal
|
||||
});
|
||||
|
@ -11,6 +11,7 @@
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"discord.js": "^14.9.0",
|
||||
"cheerio": "^1.0.0-rc.12",
|
||||
"dotenv": "^16.0.3",
|
||||
"node-fetch": "^3.3.1",
|
||||
"os": "^0.1.2",
|
||||
|
Loading…
Reference in New Issue
Block a user