Compare commits
No commits in common. "e138c67fe2431a16502adb70472f63e8b5f9ee16" and "f87b61fb2b70af9340aa6cc36a5ef822508c4e42" have entirely different histories.
e138c67fe2
...
f87b61fb2b
43
default.env
43
default.env
@ -1,48 +1,13 @@
|
|||||||
# Discord Token
|
|
||||||
THE_TOKEN = "DISCORD_TOKEN_HERE"
|
THE_TOKEN = "DISCORD_TOKEN_HERE"
|
||||||
|
|
||||||
# The Channel IDs the bot will operate in seperated by commas
|
|
||||||
CHANNEL_IDS = 1094494101631680653,1094628334727614605
|
CHANNEL_IDS = 1094494101631680653,1094628334727614605
|
||||||
|
|
||||||
# The INIT prompt for all conversations.
|
|
||||||
INIT_PROMPT = "Assistant name: ChatAI. You code, write and provide any information without any mistakes."
|
INIT_PROMPT = "Assistant name: ChatAI. You code, write and provide any information without any mistakes."
|
||||||
|
ROOT_PORT = 8000
|
||||||
# Loading Emebed Refresh Timing
|
DATA_DIR = /home/USERNAME/weights
|
||||||
REFRESH_INTERVAL=10
|
CACHE = 1
|
||||||
|
N_THREADS = 4
|
||||||
# When a message is too large for discord we chunk the response into seperate messages.
|
|
||||||
# To ensure we do not rate limit the bot we send these at a delay interval.
|
|
||||||
# DEFAULT: 3 a good setting is between 3 and 7 seconds.
|
|
||||||
OVERFLOW_DELAY=3
|
|
||||||
|
|
||||||
# Max Content to fetch from given URLs
|
|
||||||
MAX_CONTENT_LENGTH=2000
|
|
||||||
|
|
||||||
# Max tokens for Generations
|
|
||||||
MAX_TOKENS = 1024
|
MAX_TOKENS = 1024
|
||||||
|
|
||||||
# ROOT_IP is only used when running the bot without docker compose
|
# ROOT_IP is only used when running the bot without docker compose
|
||||||
ROOT_IP = 192.168.0.15
|
ROOT_IP = 192.168.0.15
|
||||||
|
|
||||||
# PORT is only used when running the bot without docker compose
|
|
||||||
ROOT_PORT = 8000
|
|
||||||
|
|
||||||
# Directory to your models (llama.cpp specfic settings)
|
|
||||||
DATA_DIR = /home/USERNAME/weights
|
|
||||||
|
|
||||||
# Enable Expirmental Message Caches (Limited to single session)
|
|
||||||
# Cache will use ~1.4 GB or MORE of RAM. ONLY ENABLE IF YOUR SYSTEM CAN HANDLE THIS.
|
|
||||||
CACHE = 1
|
|
||||||
|
|
||||||
# Set number of threads to use, currently, a standard thread will utilize 1 whole core
|
|
||||||
# I usually will set this between all cores I physcally have OR 2 cores less to allow for other processes.
|
|
||||||
N_THREADS = 4
|
|
||||||
|
|
||||||
# Always use MMAP unless you know what you are doing
|
|
||||||
USE_MMAP=1
|
|
||||||
|
|
||||||
# Only use MLOCK if you know what it does!
|
|
||||||
USE_MLOCK=0
|
|
||||||
|
|
||||||
# The higher the number the more hard core.
|
|
||||||
REPEAT_PENALTY=1
|
|
@ -11,7 +11,7 @@ services:
|
|||||||
- ${DATA_DIR}:/usr/src/app/models
|
- ${DATA_DIR}:/usr/src/app/models
|
||||||
environment:
|
environment:
|
||||||
- HOST=llama-python-server
|
- HOST=llama-python-server
|
||||||
- MODEL=./models/ggml-vic7b-q4_0.bin
|
- MODEL=./models/gpt4-x-alpaca-13b-native-4bit-128g.bin
|
||||||
llama-python-djs-bot:
|
llama-python-djs-bot:
|
||||||
container_name: llama-python-djs-bot
|
container_name: llama-python-djs-bot
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
@ -21,7 +21,6 @@ services:
|
|||||||
- llama-python-server
|
- llama-python-server
|
||||||
environment:
|
environment:
|
||||||
- THE_TOKEN
|
- THE_TOKEN
|
||||||
- REFRESH_INTERVAL
|
|
||||||
- CHANNEL_IDS
|
- CHANNEL_IDS
|
||||||
- ROOT_IP=llama-python-server
|
- ROOT_IP=llama-python-server
|
||||||
- ROOT_PORT=8000
|
- ROOT_PORT=8000
|
||||||
|
@ -1,42 +0,0 @@
|
|||||||
FROM ubuntu:latest
|
|
||||||
|
|
||||||
RUN apt update
|
|
||||||
|
|
||||||
RUN DEBIAN_FRONTEND=noninteractive apt install curl sudo -y
|
|
||||||
|
|
||||||
RUN curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash -
|
|
||||||
|
|
||||||
RUN DEBIAN_FRONTEND=noninteractive apt install nodejs -y
|
|
||||||
|
|
||||||
RUN npm i pm2 -g
|
|
||||||
|
|
||||||
RUN mkdir -p /code/.pm2
|
|
||||||
|
|
||||||
RUN mkdir -p /.pm2
|
|
||||||
|
|
||||||
RUN chmod 0777 /code/.pm2
|
|
||||||
|
|
||||||
RUN chmod 0777 /.pm2
|
|
||||||
|
|
||||||
RUN DEBIAN_FRONTEND=noninteractive apt install wget python3 python3-pip -y
|
|
||||||
|
|
||||||
WORKDIR /code
|
|
||||||
|
|
||||||
RUN pip install --no-cache-dir llama-cpp-python[server]
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
RUN npm i
|
|
||||||
|
|
||||||
ENV HOST localhost
|
|
||||||
ENV PORT 7860
|
|
||||||
ENV MODEL=/code/ggml-vic7b-q4_0.bin
|
|
||||||
ENV CACHE=1
|
|
||||||
ENV USE_MLOCK=0
|
|
||||||
ENV REPEAT_PENALTY=1
|
|
||||||
ENV MODEL=/code/ggml-vic7b-q4_0.bin
|
|
||||||
ENV PM2_HOME=/code/.pm2
|
|
||||||
|
|
||||||
RUN wget -q https://huggingface.co/eachadea/ggml-vicuna-7b-1.1/resolve/main/ggml-vic7b-q4_0.bin
|
|
||||||
|
|
||||||
CMD /bin/bash /code/start.sh
|
|
@ -1,17 +0,0 @@
|
|||||||
# How to Deploy on Hugging Face
|
|
||||||
|
|
||||||
1) Move the "assets" directory and llamabot.js, package.json files into your HuggingFace repo directory.
|
|
||||||
|
|
||||||
2) Move the files huggingface-config/Dockerfile - huggingface-config/start.sh - huggingface-config/startServices.json into the root of the hugging face repo.
|
|
||||||
|
|
||||||
3) Edit Docker file as you need, the dockerfile is set to automatically download Vicuna 1.1 7B
|
|
||||||
|
|
||||||
3) Move default.env into your repo as .env and edit for your needs
|
|
||||||
|
|
||||||
4) Push the changes
|
|
||||||
|
|
||||||
You should then see the bot being built and deployed on HuggingFace
|
|
||||||
|
|
||||||
pm2 log will run automatically so you can see frontend and backend logs.
|
|
||||||
|
|
||||||
PLEASE NOTE: Your hugging face repo should remain private!
|
|
@ -1,6 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# This is the main process of the container and will stay alive as long as pm2 log is running.
|
|
||||||
|
|
||||||
pm2 start startServices.json
|
|
||||||
|
|
||||||
pm2 log
|
|
@ -1,14 +0,0 @@
|
|||||||
{
|
|
||||||
"apps": [
|
|
||||||
{
|
|
||||||
"name": "Discord-Bot",
|
|
||||||
"script": "node /code/llamabot.js",
|
|
||||||
"args" : ""
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "AI-API",
|
|
||||||
"script": "python3 -m llama_cpp.server",
|
|
||||||
"args" : ""
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
81
llamabot.js
81
llamabot.js
@ -153,20 +153,15 @@ client.on('messageCreate', async (message) => {
|
|||||||
|
|
||||||
// if we are over the discord char limit we need chunks...
|
// if we are over the discord char limit we need chunks...
|
||||||
if (response.length > limit) {
|
if (response.length > limit) {
|
||||||
|
|
||||||
|
|
||||||
const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
|
const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
|
||||||
if (chunks.length >= 15) return await message.channel.send("Response chunks too large. Try again");
|
|
||||||
|
|
||||||
|
|
||||||
for (let i = 0; i < chunks.length; i++) {
|
for (let i = 0; i < chunks.length; i++) {
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
message.channel.send(chunks[i]);
|
message.channel.send(chunks[i]);
|
||||||
}, i * (process.env.OVERFLOW_DELAY || 3) * 1000); // delay of 3 seconds between each chunk to save on API requests
|
}, i * 3000); // delay of 3 seconds between each chunk to save on API requests
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// We are good to go, send the response
|
// We are good to go, send the response
|
||||||
await message.channel.send(response.replace("@", ""));
|
await message.channel.send(response);
|
||||||
}
|
}
|
||||||
|
|
||||||
setPresenceOnline()
|
setPresenceOnline()
|
||||||
@ -180,75 +175,17 @@ client.on('messageCreate', async (message) => {
|
|||||||
conversation.busy = false;
|
conversation.busy = false;
|
||||||
}
|
}
|
||||||
conversations.set(userID, conversation); // Update user's conversation map in memory
|
conversations.set(userID, conversation); // Update user's conversation map in memory
|
||||||
console.log(conversation)
|
|
||||||
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error(err);
|
console.error(err);
|
||||||
return sendRand(errorMessages)
|
sendRand(errorMessages)
|
||||||
} finally {
|
} finally {
|
||||||
setPresenceOnline()
|
setPresenceOnline()
|
||||||
setBusy(message.author.id, false);
|
setBusy(message.author.id, false);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
import cheerio from 'cheerio';
|
|
||||||
|
|
||||||
async function generateResponse(conversation, message) {
|
async function generateResponse(conversation, message) {
|
||||||
|
|
||||||
// Check if message contains a URL
|
|
||||||
const urlRegex = /(https?:\/\/[^\s]+)/g;
|
|
||||||
const urls = message.content.match(urlRegex);
|
|
||||||
|
|
||||||
if (urls) {
|
|
||||||
// If there are multiple URLs, process them one by one
|
|
||||||
for (const url of urls) {
|
|
||||||
try {
|
|
||||||
const res = await fetch(url);
|
|
||||||
const html = await res.text();
|
|
||||||
const $ = cheerio.load(html);
|
|
||||||
|
|
||||||
// Extract page title, meta description and content
|
|
||||||
const pageTitle = $('head title').text().trim();
|
|
||||||
const pageDescription = $('head meta[name="description"]').attr('content');
|
|
||||||
const pageContent = $('body').text().trim();
|
|
||||||
|
|
||||||
// Construct response message with page details
|
|
||||||
let response = `Title: ${pageTitle}\n`;
|
|
||||||
if (pageDescription) {
|
|
||||||
response += `Description: ${pageDescription}\n`;
|
|
||||||
}
|
|
||||||
if (pageContent) {
|
|
||||||
const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH;
|
|
||||||
let plainTextContent = $('<div>').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' ');
|
|
||||||
const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/;
|
|
||||||
const isCode = codePattern.test(plainTextContent);
|
|
||||||
|
|
||||||
if (isCode) {
|
|
||||||
plainTextContent = plainTextContent.replace(codePattern, '');
|
|
||||||
}
|
|
||||||
// Remove anything enclosed in brackets
|
|
||||||
plainTextContent = plainTextContent.replace(/ *\([^)]*\) */g, '');
|
|
||||||
if (plainTextContent.length > MAX_CONTENT_LENGTH) {
|
|
||||||
plainTextContent = plainTextContent.substring(0, MAX_CONTENT_LENGTH) + '...';
|
|
||||||
}
|
|
||||||
response += `Content: ${plainTextContent.trim()}`;
|
|
||||||
}
|
|
||||||
response += `URL: ${url}`;
|
|
||||||
|
|
||||||
// Get the index of the last message in the array
|
|
||||||
const lastMessageIndex = conversation.messages.length - 1;
|
|
||||||
|
|
||||||
// Append a new line and the new content to the existing content of the last message
|
|
||||||
conversation.messages[lastMessageIndex].content += "\n" + response;
|
|
||||||
|
|
||||||
console.log("A URL was provided, response: " + response)
|
|
||||||
|
|
||||||
} catch (err) {
|
|
||||||
console.error(err);
|
|
||||||
return sendRand(errorMessages);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const controller = new AbortController();
|
const controller = new AbortController();
|
||||||
const timeout = setTimeout(() => {
|
const timeout = setTimeout(() => {
|
||||||
controller.abort();
|
controller.abort();
|
||||||
@ -260,7 +197,7 @@ async function generateResponse(conversation, message) {
|
|||||||
let time = 0
|
let time = 0
|
||||||
// define a function that shows the system load percentage and updates the message
|
// define a function that shows the system load percentage and updates the message
|
||||||
const showSystemLoad = async () => {
|
const showSystemLoad = async () => {
|
||||||
time = Number(time) + Number(process.env.REFRESH_INTERVAL);
|
time = time + 7;
|
||||||
cpuStat.usagePercent(function(err, percent, seconds) {
|
cpuStat.usagePercent(function(err, percent, seconds) {
|
||||||
if (err) {
|
if (err) {
|
||||||
return console.log(err);
|
return console.log(err);
|
||||||
@ -273,7 +210,7 @@ async function generateResponse(conversation, message) {
|
|||||||
|
|
||||||
const embedData = {
|
const embedData = {
|
||||||
color: 0x0099ff,
|
color: 0x0099ff,
|
||||||
title: 'Please wait.. I am thinking...',
|
title: 'Please wait.. I am Thinking...',
|
||||||
fields: [
|
fields: [
|
||||||
{
|
{
|
||||||
name: 'System Load',
|
name: 'System Load',
|
||||||
@ -304,8 +241,8 @@ async function generateResponse(conversation, message) {
|
|||||||
// call the function initially
|
// call the function initially
|
||||||
await showSystemLoad();
|
await showSystemLoad();
|
||||||
|
|
||||||
// Grab the REFRESH_INTERVAL from ENV if not exist, lets use 7 (seconds)
|
// refresh the system load percentage and update the message every 7 seconds
|
||||||
const refreshInterval = setInterval(showSystemLoad, (process.env.REFRESH_INTERVAL || 7) * 1000);
|
const refreshInterval = setInterval(showSystemLoad, 7000);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
|
const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
|
||||||
@ -316,9 +253,7 @@ async function generateResponse(conversation, message) {
|
|||||||
},
|
},
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
messages: messagesCopy,
|
messages: messagesCopy,
|
||||||
max_tokens: Number(process.env.MAX_TOKENS), // add the max_tokens parameter here
|
max_tokens: Number(process.env.MAX_TOKENS) // add the max_tokens parameter here
|
||||||
repeat_penalty: Number(process.env.REPEAT_PENALTY)
|
|
||||||
|
|
||||||
}),
|
}),
|
||||||
signal: controller.signal
|
signal: controller.signal
|
||||||
});
|
});
|
||||||
|
@ -11,7 +11,6 @@
|
|||||||
"license": "ISC",
|
"license": "ISC",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"discord.js": "^14.9.0",
|
"discord.js": "^14.9.0",
|
||||||
"cheerio": "^1.0.0-rc.12",
|
|
||||||
"dotenv": "^16.0.3",
|
"dotenv": "^16.0.3",
|
||||||
"node-fetch": "^3.3.1",
|
"node-fetch": "^3.3.1",
|
||||||
"os": "^0.1.2",
|
"os": "^0.1.2",
|
||||||
|
Loading…
Reference in New Issue
Block a user