Compare commits

..

65 Commits
main ... main

Author SHA1 Message Date
Raven Scott
f53c0c7195 Fix up NodeJS install GPU side 2023-11-14 18:54:17 -05:00
Raven Scott
b723f44587 Fix up HF config - Needs testing 2023-11-14 18:21:35 -05:00
Raven Scott
7c7b5d11b7 fix nodejs install for HuggingFace 2023-11-14 18:19:39 -05:00
Raven Scott
a5d2530456 Adding error checking for stat message removal 2023-11-14 18:18:37 -05:00
Raven Scott
1f3ff44317 fix token reducer 2023-08-21 21:15:10 -04:00
Raven Scott
a980f3cd43 improving token reducer logic 2023-08-15 01:41:39 -04:00
Raven Scott
05e79cba3a fixing up token reducing if sessions too large 2023-08-15 01:08:56 -04:00
Raven Scott
f91d66b2b3 Adding assistant to Counter 2023-08-14 23:44:06 -04:00
Raven Scott
6efd069b5d counting tokens properly 2023-08-14 23:00:23 -04:00
Raven Scott
bd435ca311 default to disk cache 2023-06-12 18:45:39 +02:00
Raven Scott
61ee13bfbd remove comments 2023-06-12 18:16:16 +02:00
Raven Scott
10473ef702 beginnings of using a tokenizer to check CTX length, remove context if needed 2023-06-12 18:15:11 +02:00
Raven Scott
20c83a656a update dockerfile for GPU 2023-05-31 23:02:07 +02:00
Raven Scott
61c2fed773 update dockerfile for server-non-gpu 2023-05-27 07:35:41 +02:00
Raven Scott
6ed34de804 update readme 2023-05-27 02:02:38 +02:00
Raven Scott
6bf6c1ef28 Dont send anything if time==0 2023-05-23 15:53:53 +02:00
Raven Scott
7102bf32f0 Fix GPU Cache 2023-05-23 14:56:21 +02:00
Raven Scott
099dbf908b update 2023-05-22 18:38:47 +02:00
Raven Scott
bc6157e4a1 bug fix 2023-05-22 18:23:17 +02:00
Raven Scott
da0650d3b6 resetting botMessage to stop crashing 2023-05-22 17:46:59 +02:00
Raven Scott
393999165b adding GPU ENV Var to main compose 2023-05-22 17:35:20 +02:00
Raven Scott
c75926728c comments 2023-05-21 00:20:53 +02:00
Raven Scott
927b5c834d Add warning about caching with cuBLAS 2023-05-20 23:47:16 +02:00
Raven Scott
668b343cbb changing the name scheme for docker-compose 2023-05-20 15:14:25 +02:00
Raven Scott
51a41292e6 bringing back the embed 2023-05-20 15:11:58 +02:00
Raven Scott
2ac55922d2 revert back to older style of workflow until new logic is written 2023-05-20 04:08:27 +02:00
Raven Scott
73636804a5 bug fix 2023-05-20 02:49:17 +02:00
Raven Scott
c7a3316d45 bug fix 2023-05-19 23:49:55 +02:00
Raven Scott
1fe0f20e6f revert 2023-05-19 22:55:25 +02:00
Raven Scott
8ce9e18656 removing non needed code 2023-05-19 22:43:47 +02:00
Raven Scott
2924822a49 ensuring GPU env is set in default .env 2023-05-19 21:45:20 +02:00
Raven Scott
368004f10b adding NVIDIA GPU Support with Stats 2023-05-19 21:32:21 +02:00
Raven Scott
4b090592ad Fix hugging face 2023-05-08 22:15:51 +02:00
Raven Scott
64be911772 adding REPEAT_PENALTY 2023-05-08 22:00:24 +02:00
Raven Scott
83a7bb90ed Fix up env 2023-05-08 21:12:57 +02:00
Raven Scott
14fa3b06ff Remove the abuse of looping functions 2023-05-08 15:45:05 +02:00
Raven Scott
c7d8735c8a Remove any @ symbol to remove spam pings 2023-05-08 15:42:15 +02:00
Raven Scott
012566e93c update model var 2023-05-07 14:18:17 +02:00
Raven Scott
d18cb39ecd update hf 2023-05-06 22:19:08 +02:00
Raven Scott
9bc44c56f8 update hf 2023-05-06 14:34:53 +02:00
Raven Scott
973952aee1 Revert "Urban"
This reverts commit bd5ef4db9ad28df0b84374486f0c64e35affe1f4.
2023-05-06 03:41:36 +02:00
Raven Scott
3d2546ebcf Revert "Adding urban dictionary lookups"
This reverts commit f87542132ea4e4a304d089afadff5d781a44a2e9.
2023-05-06 03:41:16 +02:00
Raven Scott
f87542132e Adding urban dictionary lookups 2023-05-06 02:32:10 +02:00
Raven Scott
bd5ef4db9a Urban 2023-05-06 02:30:46 +02:00
Raven Scott
ee47531d2f HuggingFace Deploy Instructions 2023-05-06 01:49:21 +02:00
Raven Scott
7ea6abb0f8 HuggingFace Deploy Instructions 2023-05-06 01:46:37 +02:00
Raven Scott
4ff16b4fc7 HuggingFace Deploy Instructions 2023-05-06 01:45:01 +02:00
Raven Scott
d1807d37ad HuggingFace Deploy Instructions 2023-05-06 01:41:03 +02:00
Raven Scott
af13ca3717 Move overflow delay from static to .env 2023-05-05 21:07:39 +02:00
Raven Scott
cb880f9bc1 Fix Caching web reading the web: Append last message rather than add to the conversation. 2023-05-05 20:49:29 +02:00
Raven Scott
aec98b576b update default.env 2023-05-05 19:31:17 +02:00
Raven Scott
3b1ec922c4 remove \t 2023-05-05 19:08:13 +02:00
Raven Scott
e54826085d remove redunant code 2023-05-05 18:32:46 +02:00
Raven Scott
18923b7909 cleaner output from web scrapes 2023-05-05 18:31:24 +02:00
Raven Scott
57545a20ef update 2023-05-05 18:12:43 +02:00
Raven Scott
49f2c08544 update 2023-05-05 18:12:10 +02:00
Raven Scott
45bc433060 return on error 2023-05-05 17:48:32 +02:00
Raven Scott
c83366d0a8 update package.json for cheerio 2023-05-05 17:00:40 +02:00
Raven Scott
7c5b036114 update 2023-05-05 16:56:30 +02:00
Raven Scott
a338fb725a Adding: Web Access allowing the AI to browse URLs 2023-05-05 16:55:29 +02:00
Raven Scott
a099502f1a Fix ShowSystemLoad 2023-05-02 14:38:57 +02:00
Raven Scott
c0ab1541e1 Adding refresh interval to system load status 2023-05-02 14:22:58 +02:00
Raven Scott
779ac1f462 Moving REFRESH_INTERVAL to docker-compose 2023-05-02 14:16:13 +02:00
Raven Scott
335b01287e Moving REFRESH_INTERVAL to .env 2023-05-02 14:15:45 +02:00
Raven Scott
d4e8563817 lowercase Thinking it looks better :P 2023-05-02 14:07:36 +02:00
11 changed files with 1126 additions and 67 deletions

23
Dockerfile.gpu Normal file
View File

@ -0,0 +1,23 @@
FROM nvidia/cuda:12.1.1-devel-ubuntu20.04
ENV DEBIAN_FRONTEND noninteractive
WORKDIR /app
RUN apt update
RUN apt install sudo curl -y
RUN apt-get install -y ca-certificates curl gnupg
RUN sudo mkdir -p /etc/apt/keyrings
RUN curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | sudo gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
ENV NODE_MAJOR=18
RUN echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | sudo tee /etc/apt/sources.list.d/nodesource.list
RUN apt-get update
RUN apt-get install nodejs -y
COPY package*.json ./
RUN npm install --omit=dev
COPY . .
CMD node llamabot.js

View File

@ -78,4 +78,26 @@ This will automatically configure the API for you as well as the bot in two sepe
5. `docker compose up -d`
# Docker Compose with GPU
This will automatically configure the API that supports cuBLAS and GPU inference for you as well as the bot in two seperate containers within a stack.
NOTE: Caching for GPU has been fixed.
1. `git clone https://git.ssh.surf/snxraven/llama-cpp-python-djs-bot.git` - Clone the repo
2. `mv docker-compose.yml docker-compose.nogpu.yml; mv docker-compose.gpu.yml docker-compose.yml;` - Move nongpu compose out of the way, Enable GPU Support
3. `mv Dockerfile Dockerfile.nongpu; mv Dockerfile.gpu Dockerfile;` - Move nongpu Dockerfile out of the way, enable GPU Support
3. `cp default.gpu.env .env` - Copy the default GPU .env to its proper location
4. Set DATA_DIR in .env to the exact location of your model files.
5. Edit docker-compose.yaml MODEL to ensure the correct model bin is set
6. set N_GPU_LAYERS to the amount of layers you would like to export to GPU
7. `docker compose up -d`
Want to make this better? Issue a pull request!

View File

@ -1,14 +1,14 @@
# Discord Token
THE_TOKEN = "DISCORD_TOKEN_HERE"
THE_TOKEN = ""
# The Channel IDs the bot will operate in seperated by commas
CHANNEL_IDS = 1094494101631680653,1094628334727614605
CHANNEL_IDS =
# The INIT prompt for all conversations.
INIT_PROMPT = "Assistant name: ChatAI. You code, write and provide any information without any mistakes."
INIT_PROMPT = "Assistant name: ChatAI. You code, write and provide any information without any mistakes. You can read basic text from URLs if a user sends a user via scraping."
# Loading Emebed Refresh Timing
REFRESH_INTERVAL=10
REFRESH_INTERVAL=2
# When a message is too large for discord we chunk the response into seperate messages.
# To ensure we do not rate limit the bot we send these at a delay interval.
@ -19,30 +19,40 @@ OVERFLOW_DELAY=3
MAX_CONTENT_LENGTH=2000
# Max tokens for Generations
MAX_TOKENS = 1024
MAX_TOKENS = 1499
# ROOT_IP is only used when running the bot without docker compose
ROOT_IP = 192.168.0.15
ROOT_IP = 127.0.0.1
# PORT is only used when running the bot without docker compose
ROOT_PORT = 8000
# Directory to your models (llama.cpp specfic settings)
DATA_DIR = /home/USERNAME/weights
DATA_DIR = /Users/username/code/models
# Enable Expirmental Message Caches (Limited to single session)
# Cache will use ~1.4 GB or MORE of RAM. ONLY ENABLE IF YOUR SYSTEM CAN HANDLE THIS.
CACHE = 1
CACHE_TYPE = "disk"
# Set number of threads to use, currently, a standard thread will utilize 1 whole core
# I usually will set this between all cores I physcally have OR 2 cores less to allow for other processes.
N_THREADS = 4
N_THREADS = 10
# Always use MMAP unless you know what you are doing
USE_MMAP=1
#USE_MMAP=1
# Only use MLOCK if you know what it does!
USE_MLOCK=0
# The higher the number the more hard core.
REPEAT_PENALTY=1
REPEAT_PENALTY=1
# GPU SPECIFIC SETTINGS BELOW
GPU=0
N_GPU_LAYERS=32
PYTHONUNBUFFERED=1

34
docker-compose.gpu.yml Normal file
View File

@ -0,0 +1,34 @@
version: '3.9'
services:
backend:
container_name: llama-gpu-server
restart: unless-stopped
build:
context: ./gpu-server
env_file: .env
volumes:
- ${DATA_DIR}:/usr/src/app/models
environment:
- HOST=llama-gpu-server
- MODEL=./models/ggml-vic7b-q5_1.bin.1
- NVIDIA_VISIBLE_DEVICES=all
runtime: nvidia
frontend:
container_name: llama-djs-bot
restart: unless-stopped
build:
context: .
depends_on:
- backend
environment:
- THE_TOKEN
- REFRESH_INTERVAL
- CHANNEL_IDS
- GPU
- ROOT_IP=llama-gpu-server
- ROOT_PORT=8000
- INIT_PROMPT='Assistant Name':' ChatAI. You code, write and provide any information without any mistakes.'
- NVIDIA_VISIBLE_DEVICES=all
runtime: nvidia

View File

@ -11,7 +11,7 @@ services:
- ${DATA_DIR}:/usr/src/app/models
environment:
- HOST=llama-python-server
- MODEL=./models/ggml-vic7b-q4_0.bin
- MODEL=./models/vicuna-7b-1.1.ggmlv3.q6_K.bin
llama-python-djs-bot:
container_name: llama-python-djs-bot
restart: unless-stopped
@ -21,6 +21,7 @@ services:
- llama-python-server
environment:
- THE_TOKEN
- GPU
- REFRESH_INTERVAL
- CHANNEL_IDS
- ROOT_IP=llama-python-server

19
gpu-server/Dockerfile Normal file
View File

@ -0,0 +1,19 @@
FROM nvidia/cuda:12.1.1-devel-ubuntu20.04
# Install the deps
ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=Etc/GMT
RUN apt-get update && apt-get install -y --no-install-recommends python3 python3-pip git cmake build-essential
# Get llama-cpp-python
WORKDIR /usr/src
WORKDIR /usr/src/app
# Build llama-cpp-python w/CuBLAS
RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python[server]
# We need to set the host to 0.0.0.0 to allow outside access
ENV HOST 0.0.0.0
# Run the server
CMD python3 -m llama_cpp.server

View File

@ -4,7 +4,13 @@ RUN apt update
RUN DEBIAN_FRONTEND=noninteractive apt install curl sudo -y
RUN curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash -
RUN apt-get install -y ca-certificates curl gnupg
RUN sudo mkdir -p /etc/apt/keyrings
RUN curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | sudo gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
ENV NODE_MAJOR=18
RUN echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | sudo tee /etc/apt/sources.list.d/nodesource.list
RUN apt-get update
RUN apt-get install nodejs -y
RUN DEBIAN_FRONTEND=noninteractive apt install nodejs -y
@ -30,13 +36,13 @@ RUN npm i
ENV HOST localhost
ENV PORT 7860
ENV MODEL=/code/ggml-vic7b-q4_0.bin
ENV MODEL=/code/mistral-7b-instruct-v0.1.Q2_K.gguf
ENV CACHE=1
ENV USE_MLOCK=0
ENV REPEAT_PENALTY=1
ENV MODEL=/code/ggml-vic7b-q4_0.bin
ENV MODEL=/code/mistral-7b-instruct-v0.1.Q2_K.gguf
ENV PM2_HOME=/code/.pm2
RUN wget -q https://huggingface.co/eachadea/ggml-vicuna-7b-1.1/resolve/main/ggml-vic7b-q4_0.bin
RUN wget -q -O mistral-7b-instruct-v0.1.Q2_K.gguf "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q2_K.gguf?download=true"
CMD /bin/bash /code/start.sh

View File

@ -5,6 +5,8 @@ import { resetResponses, userResetMessages } from './assets/resetMessages.js';
import { errorMessages, busyResponses } from './assets/errorMessages.js';
import cpuStat from 'cpu-stat';
import os from 'os';
import smi from 'node-nvidia-smi';
import llamaTokenizer from 'llama-tokenizer-js'
import {
Client,
@ -27,8 +29,14 @@ const client = new Client({
// Grab ChannelIDs from the .env file
const channelIDs = process.env.CHANNEL_IDS.split(',');
// Store Conversations in a MAP
const conversations = new Map();
let botMessage; // define a variable to hold the message object
// Set busy function this allows us to set our bot into busy mode
// locking out all other tasks until the current one is complete
function setBusy(userId, isBusy) {
if (conversations.has(userId)) {
conversations.get(userId).busy = isBusy;
@ -39,6 +47,8 @@ function setBusy(userId, isBusy) {
}
}
// General check, if any conversation is busy
// If yes, flag it and let us know
function isAnyConversationBusy() {
for (const conversation of conversations.values()) {
if (conversation.busy) {
@ -49,6 +59,7 @@ function isAnyConversationBusy() {
return false;
}
// Setting our precence to busy within the bots status
function setPresenceBusy() {
client.user.setPresence({
activities: [{
@ -59,6 +70,8 @@ function setPresenceBusy() {
});
}
// Setting our precence to ready within the bots status
function setPresenceOnline() {
client.user.setPresence({
activities: [{
@ -70,18 +83,23 @@ function setPresenceOnline() {
}
// When we have logged in to discord api
// Set precence to online.
client.once('ready', () => {
console.log('Bot is ready.');
setPresenceOnline()
});
// When a message is sent within discord, lets handle it.
client.on('messageCreate', async (message) => {
// Function to send a random message from any array
async function sendRand(array) {
const arrayChoice = array[Math.floor(Math.random() * array.length)];
await message.channel.send(arrayChoice); // give a notification of reset using a human like response.
}
// Function to send a random Direct Message from any array
async function sendRandDM(array) {
const arrayChoice = array[Math.floor(Math.random() * array.length)];
await message.author.send(arrayChoice); // give a notification of reset using a human like response.
@ -92,7 +110,8 @@ client.on('messageCreate', async (message) => {
return;
}
if (message.author.bot) return; // Ignore messages from bots
// Always ignore bots!
if (message.author.bot) return;
// Check if any conversation is busy
if (isAnyConversationBusy()) {
@ -102,12 +121,18 @@ client.on('messageCreate', async (message) => {
sendRandDM(busyResponses);
return;
}
// Set user ID and get our conversation.
const userID = message.author.id;
let conversation = conversations.get(userID) || {
messages: [],
busy: false
};
// If we do not have a conversation, lets generate one.
// This requires a chatflow for the API.
// Its better to have a default beginning conversation
// Providing context for the AI Model.
if (conversation.messages.length === 0) {
conversation.messages.push({
role: 'user',
@ -123,12 +148,15 @@ client.on('messageCreate', async (message) => {
});
}
// If a user needs a reset, we delete their MAP
if (message.content === '!reset' || message.content === '!r') {
conversations.delete(userID); // Delete user's conversation map if they request reset
sendRand(userResetMessages)
return;
}
// Begin processing our conversation, this is our main work flow.
// Append user message to conversation history
conversation.messages.push({
role: 'user',
@ -136,12 +164,18 @@ client.on('messageCreate', async (message) => {
});
try {
// Now we have our conversation set up
// Lets set precence to busy
// We also will set our conversations MAP to busy
// Locking out all other tasks
setPresenceBusy()
setBusy(message.author.id, true);
// Lets start generating the response
const response = await generateResponse(conversation, message);
// Append bot message to conversation history
// Append bot message to conversation history when it is ready
conversation.messages.push({
role: 'assistant',
content: response
@ -154,22 +188,26 @@ client.on('messageCreate', async (message) => {
// if we are over the discord char limit we need chunks...
if (response.length > limit) {
// We are going to check all of the message chunks if our response is too large for discord.
// We can extend our message size using chunks, the issue?
// Users can abuse this feature, we lock this to 15 to avoid API Abuse.
const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
if (chunks.length >= 15) return await message.channel.send("Response chunks too large. Try again");
if (chunks.length >= 40) return await message.channel.send("Response chunks too large. Try again");
// If we do now have too many chunks, lets send each one using our overflow delay
for (let i = 0; i < chunks.length; i++) {
setTimeout(() => {
message.channel.send(chunks[i]);
}, i * (process.env.OVERFLOW_DELAY || 3) * 1000); // delay of 3 seconds between each chunk to save on API requests
}
} else {
// We are good to go, send the response
// We are good to go message is not too large for discord, send the response
await message.channel.send(response.replace("@", ""));
}
// We have completed our task, lets go online
setPresenceOnline()
// set our conversation MAP to not busy
setBusy(message.author.id, false);
} else {
// Handle empty response here
@ -179,24 +217,30 @@ client.on('messageCreate', async (message) => {
setPresenceOnline()
conversation.busy = false;
}
conversations.set(userID, conversation); // Update user's conversation map in memory
console.log(conversation)
conversations.set(userID, conversation); // Update user's conversation map in memory
// Print the current conversation as it stands
console.log(conversation)
} catch (err) {
// If we have any errors lets send a response
console.error(err);
return sendRand(errorMessages)
} finally {
// We are done! Lets finish up going online
setPresenceOnline()
setBusy(message.author.id, false);
}
});
// Import cheerio for scraping
import cheerio from 'cheerio';
async function generateResponse(conversation, message) {
// Begin web scraper if a https:// OR http:// URL is detected
// Check if message contains a URL
const urlRegex = /(https?:\/\/[^\s]+)/g;
// Match our REGEX
const urls = message.content.match(urlRegex);
if (urls) {
@ -218,15 +262,17 @@ async function generateResponse(conversation, message) {
response += `Description: ${pageDescription}\n`;
}
if (pageContent) {
// Lets check for content and grab only the amount as configured.
const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH;
let plainTextContent = $('<div>').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' ');
// Clean up code remove it from processing
const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/;
const isCode = codePattern.test(plainTextContent);
if (isCode) {
plainTextContent = plainTextContent.replace(codePattern, '');
}
// Remove anything enclosed in brackets
// Remove anything enclosed in brackets JUNK DATA
plainTextContent = plainTextContent.replace(/ *\([^)]*\) */g, '');
if (plainTextContent.length > MAX_CONTENT_LENGTH) {
plainTextContent = plainTextContent.substring(0, MAX_CONTENT_LENGTH) + '...';
@ -240,7 +286,7 @@ async function generateResponse(conversation, message) {
// Append a new line and the new content to the existing content of the last message
conversation.messages[lastMessageIndex].content += "\n" + response;
console.log("A URL was provided, response: " + response)
} catch (err) {
@ -249,65 +295,192 @@ async function generateResponse(conversation, message) {
}
}
}
// We need an abort controller to stop our progress message editor
const controller = new AbortController();
// Set our timeout for the controller
const timeout = setTimeout(() => {
controller.abort();
}, 900000);
// Copy our messages from MAP
const messagesCopy = [...conversation.messages]; // create a copy of the messages array
let botMessage; // define a variable to hold the message object
let time = 0
// define a function that shows the system load percentage and updates the message
const showSystemLoad = async () => {
// Configure our inital time
time = Number(time) + Number(process.env.REFRESH_INTERVAL);
// Get system stats
cpuStat.usagePercent(function (err, percent, seconds) {
if (err) {
return console.log(err);
}
// Setting out system stat vars
const systemLoad = percent;
const freeMemory = os.freemem() / 1024 / 1024 / 1024;
const totalMemory = os.totalmem() / 1024 / 1024 / 1024;
const usedMemory = totalMemory - freeMemory;
const embedData = {
color: 0x0099ff,
title: 'Please wait.. I am thinking...',
fields: [
{
name: 'System Load',
value: `${systemLoad.toFixed(2)}%`,
},
{
name: 'Memory Usage',
value: `${usedMemory.toFixed(2)} GB / ${totalMemory.toFixed(2)} GB`,
},
{
name: 'Time',
value: `~${time} seconds.`,
},
],
};
// lets build some embed data
let embedData;
// if the message object doesn't exist, create it
if (!botMessage) {
(async () => {
botMessage = await message.channel.send({ embeds: [embedData] });
})();
// If we have NO GPU config lets send system stats only
if (process.env.GPU == 0) {
embedData = {
color: 0x0099ff,
title: 'Please wait.. I am thinking...',
fields: [
{
name: 'System Load',
value: `${systemLoad.toFixed(2)}%`,
},
{
name: 'Memory Usage',
value: `${usedMemory.toFixed(2)} GB / ${totalMemory.toFixed(2)} GB`,
},
{
name: 'Time',
value: `~${time} seconds.`,
},
],
};
// if the message object doesn't exist, create it
if (!botMessage) {
(async () => {
if (time == 0) return
botMessage = await message.channel.send({ embeds: [embedData] });
})();
} else {
(async () => {
if (!isAnyConversationBusy()) {
botMessage.delete()
} else {
await botMessage.edit({ embeds: [embedData] }); // otherwise, update the message
}
})();
}
} else {
botMessage.edit({ embeds: [embedData] }); // otherwise, update the message
// If we do have GPU=1 lets send some card info too!
smi(function (err, data) {
if (err) {
// Handle error if smi function fails
console.error(err);
return;
}
let utilization = data.nvidia_smi_log.gpu.utilization;
let gpuUtilization = utilization.gpu_util;
let memoryUtilization = utilization.memory_util;
let gpuTemp = data.nvidia_smi_log.gpu.temperature.gpu_temp;
// These are not used until nvidia-docker fixes their support
let gpuTarget = data.nvidia_smi_log.gpu.temperature.gpu_target_temperature;
let gpuFanSpeed = data.nvidia_smi_log.gpu.fan_speed;
embedData = {
color: 0x0099ff,
title: 'Please wait.. I am thinking...',
fields: [
{
name: 'System Load',
value: `${systemLoad.toFixed(2)}%`,
},
{
name: 'Memory Usage',
value: `${usedMemory.toFixed(2)} GB / ${totalMemory.toFixed(2)} GB`,
},
{
name: 'GPU Utilization',
value: `${gpuUtilization}`,
},
{
name: 'Memory Utilization',
value: `${memoryUtilization}`,
},
{
name: 'GPU Temperature',
value: `${gpuTemp}`,
},
{
name: 'Time',
value: `~${time} seconds.`,
},
],
};
// if the message object doesn't exist, create it
if (!botMessage) {
(async () => {
if (time == 0) return
botMessage = await message.channel.send({ embeds: [embedData] });
})();
} else {
(async () => {
if (!isAnyConversationBusy()) {
botMessage.delete()
} else {
await botMessage.edit({ embeds: [embedData] }); // otherwise, update the message
}
})();
}
})
}
});
};
// call the function initially
await showSystemLoad();
// Grab the REFRESH_INTERVAL from ENV if not exist, lets use 7 (seconds)
const refreshInterval = setInterval(showSystemLoad, (process.env.REFRESH_INTERVAL || 7) * 1000);
try {
// call the function initially
await showSystemLoad();
// Grab the REFRESH_INTERVAL from ENV if not exist, lets use 7 (seconds)
const refreshInterval = setInterval(showSystemLoad, (process.env.REFRESH_INTERVAL || 7) * 1000);
function countLlamaTokens(messages) {
let totalTokens = 0;
for (const message of messages) {
if (message.role === 'user' || message.role === 'assistant') {
const encodedTokens = llamaTokenizer.encode(message.content);
totalTokens += encodedTokens.length;
}
}
return totalTokens;
}
let totalTokens = countLlamaTokens(messagesCopy);
console.log(`Total Llama tokens: ${totalTokens}`);
let tokenLength = totalTokens
// Remove older conversations if necessary
const maxLength = 1800;
const tolerance = 25; // allow for some flexibility in the token length
if (tokenLength > maxLength + tolerance) {
const diff = tokenLength - (maxLength + tolerance);
let removedTokens = 0;
// Iterate over the messages in reverse order
for (let i = messagesCopy.length - 1; i >= 0; i--) {
const message = messagesCopy[i];
const messageTokens = countLlamaTokens([message]);
// Check if the current message plus the tokens in the message is less than or equal to the diff
if (removedTokens + messageTokens <= diff) {
messagesCopy.splice(i, 1);
removedTokens += messageTokens;
console.log(removedTokens + " removed \nAfter Resize: " + countLlamaTokens(messagesCopy));
} else {
// Remove more than one message if necessary to bring the total length below the maximum allowed length
const messagesToRemove = Math.floor(diff / messageTokens);
for (let j = 0; j < messagesToRemove; j++) {
messagesCopy.splice(i, 1);
removedTokens += messageTokens;
}
break;
}
}
}
// Sending request to our API
const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
method: 'POST',
headers: {
@ -330,9 +503,18 @@ async function generateResponse(conversation, message) {
const responseText = choice.message.content;
// clear the interval, replace the "please wait" message with the response, and update the message
clearInterval(refreshInterval);
console.log(responseText);
botMessage.delete()
try {
if (time > 2) {
await botMessage.delete();
clearInterval(refreshInterval);
botMessage = null;
console.log("Time limit reached. Message deleted.");
}
} catch (err) {
console.log("Error deleting message: ", err);
}
return responseText;
@ -340,8 +522,9 @@ async function generateResponse(conversation, message) {
throw err;
} finally {
clearTimeout(timeout);
botMessage = null;
time = 0
}
}
client.login(process.env.THE_TOKEN); // Replace with your bot token
client.login(process.env.THE_TOKEN); // Replace with your bot token

757
package-lock.json generated Normal file
View File

@ -0,0 +1,757 @@
{
"name": "llama-cpp-python-djs-bot",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "llama-cpp-python-djs-bot",
"version": "1.0.0",
"license": "ISC",
"dependencies": {
"cheerio": "^1.0.0-rc.12",
"cpu-stat": "^2.0.1",
"discord.js": "^14.9.0",
"dotenv": "^16.0.3",
"gpt-tokenizer": "^2.1.1",
"llama-tokenizer-js": "^1.0.0",
"node-fetch": "^3.3.1",
"node-nvidia-smi": "^1.0.0",
"os": "^0.1.2",
"tiktoken": "^1.0.10"
}
},
"node_modules/@discordjs/builders": {
"version": "1.6.3",
"resolved": "https://registry.npmjs.org/@discordjs/builders/-/builders-1.6.3.tgz",
"integrity": "sha512-CTCh8NqED3iecTNuiz49mwSsrc2iQb4d0MjMdmS/8pb69Y4IlzJ/DIy/p5GFlgOrFbNO2WzMHkWKQSiJ3VNXaw==",
"dependencies": {
"@discordjs/formatters": "^0.3.1",
"@discordjs/util": "^0.3.1",
"@sapphire/shapeshift": "^3.8.2",
"discord-api-types": "^0.37.41",
"fast-deep-equal": "^3.1.3",
"ts-mixer": "^6.0.3",
"tslib": "^2.5.0"
},
"engines": {
"node": ">=16.9.0"
}
},
"node_modules/@discordjs/collection": {
"version": "1.5.1",
"resolved": "https://registry.npmjs.org/@discordjs/collection/-/collection-1.5.1.tgz",
"integrity": "sha512-aWEc9DCf3TMDe9iaJoOnO2+JVAjeRNuRxPZQA6GVvBf+Z3gqUuWYBy2NWh4+5CLYq5uoc3MOvUQ5H5m8CJBqOA==",
"engines": {
"node": ">=16.9.0"
}
},
"node_modules/@discordjs/formatters": {
"version": "0.3.1",
"resolved": "https://registry.npmjs.org/@discordjs/formatters/-/formatters-0.3.1.tgz",
"integrity": "sha512-M7X4IGiSeh4znwcRGcs+49B5tBkNDn4k5bmhxJDAUhRxRHTiFAOTVUNQ6yAKySu5jZTnCbSvTYHW3w0rAzV1MA==",
"dependencies": {
"discord-api-types": "^0.37.41"
},
"engines": {
"node": ">=16.9.0"
}
},
"node_modules/@discordjs/rest": {
"version": "1.7.1",
"resolved": "https://registry.npmjs.org/@discordjs/rest/-/rest-1.7.1.tgz",
"integrity": "sha512-Ofa9UqT0U45G/eX86cURQnX7gzOJLG2oC28VhIk/G6IliYgQF7jFByBJEykPSHE4MxPhqCleYvmsrtfKh1nYmQ==",
"dependencies": {
"@discordjs/collection": "^1.5.1",
"@discordjs/util": "^0.3.0",
"@sapphire/async-queue": "^1.5.0",
"@sapphire/snowflake": "^3.4.2",
"discord-api-types": "^0.37.41",
"file-type": "^18.3.0",
"tslib": "^2.5.0",
"undici": "^5.22.0"
},
"engines": {
"node": ">=16.9.0"
}
},
"node_modules/@discordjs/util": {
"version": "0.3.1",
"resolved": "https://registry.npmjs.org/@discordjs/util/-/util-0.3.1.tgz",
"integrity": "sha512-HxXKYKg7vohx2/OupUN/4Sd02Ev3PBJ5q0gtjdcvXb0ErCva8jNHWfe/v5sU3UKjIB/uxOhc+TDOnhqffj9pRA==",
"engines": {
"node": ">=16.9.0"
}
},
"node_modules/@discordjs/ws": {
"version": "0.8.3",
"resolved": "https://registry.npmjs.org/@discordjs/ws/-/ws-0.8.3.tgz",
"integrity": "sha512-hcYtppanjHecbdNyCKQNH2I4RP9UrphDgmRgLYrATEQF1oo4sYSve7ZmGsBEXSzH72MO2tBPdWSThunbxUVk0g==",
"dependencies": {
"@discordjs/collection": "^1.5.1",
"@discordjs/rest": "^1.7.1",
"@discordjs/util": "^0.3.1",
"@sapphire/async-queue": "^1.5.0",
"@types/ws": "^8.5.4",
"@vladfrangu/async_event_emitter": "^2.2.1",
"discord-api-types": "^0.37.41",
"tslib": "^2.5.0",
"ws": "^8.13.0"
},
"engines": {
"node": ">=16.9.0"
}
},
"node_modules/@sapphire/async-queue": {
"version": "1.5.0",
"resolved": "https://registry.npmjs.org/@sapphire/async-queue/-/async-queue-1.5.0.tgz",
"integrity": "sha512-JkLdIsP8fPAdh9ZZjrbHWR/+mZj0wvKS5ICibcLrRI1j84UmLMshx5n9QmL8b95d4onJ2xxiyugTgSAX7AalmA==",
"engines": {
"node": ">=v14.0.0",
"npm": ">=7.0.0"
}
},
"node_modules/@sapphire/shapeshift": {
"version": "3.9.2",
"resolved": "https://registry.npmjs.org/@sapphire/shapeshift/-/shapeshift-3.9.2.tgz",
"integrity": "sha512-YRbCXWy969oGIdqR/wha62eX8GNHsvyYi0Rfd4rNW6tSVVa8p0ELiMEuOH/k8rgtvRoM+EMV7Csqz77YdwiDpA==",
"dependencies": {
"fast-deep-equal": "^3.1.3",
"lodash": "^4.17.21"
},
"engines": {
"node": ">=v14.0.0",
"npm": ">=7.0.0"
}
},
"node_modules/@sapphire/snowflake": {
"version": "3.5.1",
"resolved": "https://registry.npmjs.org/@sapphire/snowflake/-/snowflake-3.5.1.tgz",
"integrity": "sha512-BxcYGzgEsdlG0dKAyOm0ehLGm2CafIrfQTZGWgkfKYbj+pNNsorZ7EotuZukc2MT70E0UbppVbtpBrqpzVzjNA==",
"engines": {
"node": ">=v14.0.0",
"npm": ">=7.0.0"
}
},
"node_modules/@tokenizer/token": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/@tokenizer/token/-/token-0.3.0.tgz",
"integrity": "sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A=="
},
"node_modules/@types/node": {
"version": "20.3.0",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.3.0.tgz",
"integrity": "sha512-cumHmIAf6On83X7yP+LrsEyUOf/YlociZelmpRYaGFydoaPdxdt80MAbu6vWerQT2COCp2nPvHdsbD7tHn/YlQ=="
},
"node_modules/@types/ws": {
"version": "8.5.5",
"resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.5.5.tgz",
"integrity": "sha512-lwhs8hktwxSjf9UaZ9tG5M03PGogvFaH8gUgLNbN9HKIg0dvv6q+gkSuJ8HN4/VbyxkuLzCjlN7GquQ0gUJfIg==",
"dependencies": {
"@types/node": "*"
}
},
"node_modules/@vladfrangu/async_event_emitter": {
"version": "2.2.2",
"resolved": "https://registry.npmjs.org/@vladfrangu/async_event_emitter/-/async_event_emitter-2.2.2.tgz",
"integrity": "sha512-HIzRG7sy88UZjBJamssEczH5q7t5+axva19UbZLO6u0ySbYPrwzWiXBcC0WuHyhKKoeCyneH+FvYzKQq/zTtkQ==",
"engines": {
"node": ">=v14.0.0",
"npm": ">=7.0.0"
}
},
"node_modules/boolbase": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
"integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww=="
},
"node_modules/busboy": {
"version": "1.6.0",
"resolved": "https://registry.npmjs.org/busboy/-/busboy-1.6.0.tgz",
"integrity": "sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA==",
"dependencies": {
"streamsearch": "^1.1.0"
},
"engines": {
"node": ">=10.16.0"
}
},
"node_modules/cheerio": {
"version": "1.0.0-rc.12",
"resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0-rc.12.tgz",
"integrity": "sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==",
"dependencies": {
"cheerio-select": "^2.1.0",
"dom-serializer": "^2.0.0",
"domhandler": "^5.0.3",
"domutils": "^3.0.1",
"htmlparser2": "^8.0.1",
"parse5": "^7.0.0",
"parse5-htmlparser2-tree-adapter": "^7.0.0"
},
"engines": {
"node": ">= 6"
},
"funding": {
"url": "https://github.com/cheeriojs/cheerio?sponsor=1"
}
},
"node_modules/cheerio-select": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz",
"integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==",
"dependencies": {
"boolbase": "^1.0.0",
"css-select": "^5.1.0",
"css-what": "^6.1.0",
"domelementtype": "^2.3.0",
"domhandler": "^5.0.3",
"domutils": "^3.0.1"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/cpu-stat": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/cpu-stat/-/cpu-stat-2.0.1.tgz",
"integrity": "sha512-bC4ts/0IjYfNV6Dc7F2NauWM0tip0fneZjRek8HqX2ZERC4oSt6dmV+GTN1mfE9OKbLAppv58M2PVzKLGB731w=="
},
"node_modules/css-select": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/css-select/-/css-select-5.1.0.tgz",
"integrity": "sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==",
"dependencies": {
"boolbase": "^1.0.0",
"css-what": "^6.1.0",
"domhandler": "^5.0.2",
"domutils": "^3.0.1",
"nth-check": "^2.0.1"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/css-what": {
"version": "6.1.0",
"resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz",
"integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==",
"engines": {
"node": ">= 6"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/data-uri-to-buffer": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz",
"integrity": "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==",
"engines": {
"node": ">= 12"
}
},
"node_modules/discord-api-types": {
"version": "0.37.43",
"resolved": "https://registry.npmjs.org/discord-api-types/-/discord-api-types-0.37.43.tgz",
"integrity": "sha512-bBhDWU3TF9KADxR/mHp1K4Bvu/LRtFQdGyBjADu4e66F3ZnD4kp12W/SJCttIaCcMXzPV3sfty6eDGRNRph51Q=="
},
"node_modules/discord.js": {
"version": "14.11.0",
"resolved": "https://registry.npmjs.org/discord.js/-/discord.js-14.11.0.tgz",
"integrity": "sha512-CkueWYFQ28U38YPR8HgsBR/QT35oPpMbEsTNM30Fs8loBIhnA4s70AwQEoy6JvLcpWWJO7GY0y2BUzZmuBMepQ==",
"dependencies": {
"@discordjs/builders": "^1.6.3",
"@discordjs/collection": "^1.5.1",
"@discordjs/formatters": "^0.3.1",
"@discordjs/rest": "^1.7.1",
"@discordjs/util": "^0.3.1",
"@discordjs/ws": "^0.8.3",
"@sapphire/snowflake": "^3.4.2",
"@types/ws": "^8.5.4",
"discord-api-types": "^0.37.41",
"fast-deep-equal": "^3.1.3",
"lodash.snakecase": "^4.1.1",
"tslib": "^2.5.0",
"undici": "^5.22.0",
"ws": "^8.13.0"
},
"engines": {
"node": ">=16.9.0"
}
},
"node_modules/dom-serializer": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz",
"integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==",
"dependencies": {
"domelementtype": "^2.3.0",
"domhandler": "^5.0.2",
"entities": "^4.2.0"
},
"funding": {
"url": "https://github.com/cheeriojs/dom-serializer?sponsor=1"
}
},
"node_modules/domelementtype": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz",
"integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/fb55"
}
]
},
"node_modules/domhandler": {
"version": "5.0.3",
"resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz",
"integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==",
"dependencies": {
"domelementtype": "^2.3.0"
},
"engines": {
"node": ">= 4"
},
"funding": {
"url": "https://github.com/fb55/domhandler?sponsor=1"
}
},
"node_modules/domutils": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/domutils/-/domutils-3.1.0.tgz",
"integrity": "sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==",
"dependencies": {
"dom-serializer": "^2.0.0",
"domelementtype": "^2.3.0",
"domhandler": "^5.0.3"
},
"funding": {
"url": "https://github.com/fb55/domutils?sponsor=1"
}
},
"node_modules/dotenv": {
"version": "16.1.4",
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.1.4.tgz",
"integrity": "sha512-m55RtE8AsPeJBpOIFKihEmqUcoVncQIwo7x9U8ZwLEZw9ZpXboz2c+rvog+jUaJvVrZ5kBOeYQBX5+8Aa/OZQw==",
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://github.com/motdotla/dotenv?sponsor=1"
}
},
"node_modules/entities": {
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
"integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
"engines": {
"node": ">=0.12"
},
"funding": {
"url": "https://github.com/fb55/entities?sponsor=1"
}
},
"node_modules/fast-deep-equal": {
"version": "3.1.3",
"resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
"integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q=="
},
"node_modules/fetch-blob": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.2.0.tgz",
"integrity": "sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/jimmywarting"
},
{
"type": "paypal",
"url": "https://paypal.me/jimmywarting"
}
],
"dependencies": {
"node-domexception": "^1.0.0",
"web-streams-polyfill": "^3.0.3"
},
"engines": {
"node": "^12.20 || >= 14.13"
}
},
"node_modules/file-type": {
"version": "18.5.0",
"resolved": "https://registry.npmjs.org/file-type/-/file-type-18.5.0.tgz",
"integrity": "sha512-yvpl5U868+V6PqXHMmsESpg6unQ5GfnPssl4dxdJudBrr9qy7Fddt7EVX1VLlddFfe8Gj9N7goCZH22FXuSQXQ==",
"dependencies": {
"readable-web-to-node-stream": "^3.0.2",
"strtok3": "^7.0.0",
"token-types": "^5.0.1"
},
"engines": {
"node": ">=14.16"
},
"funding": {
"url": "https://github.com/sindresorhus/file-type?sponsor=1"
}
},
"node_modules/formdata-polyfill": {
"version": "4.0.10",
"resolved": "https://registry.npmjs.org/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz",
"integrity": "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==",
"dependencies": {
"fetch-blob": "^3.1.2"
},
"engines": {
"node": ">=12.20.0"
}
},
"node_modules/gpt-tokenizer": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/gpt-tokenizer/-/gpt-tokenizer-2.1.1.tgz",
"integrity": "sha512-WlX+vj6aPaZ71U6Bf18fem+5k58zlgh2a4nbc7KHy6aGVIyq3nCh709b/8momu34sV/5t/SpzWi8LayWD9uyDw==",
"dependencies": {
"rfc4648": "^1.5.2"
}
},
"node_modules/htmlparser2": {
"version": "8.0.2",
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-8.0.2.tgz",
"integrity": "sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==",
"funding": [
"https://github.com/fb55/htmlparser2?sponsor=1",
{
"type": "github",
"url": "https://github.com/sponsors/fb55"
}
],
"dependencies": {
"domelementtype": "^2.3.0",
"domhandler": "^5.0.3",
"domutils": "^3.0.1",
"entities": "^4.4.0"
}
},
"node_modules/ieee754": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
"integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
]
},
"node_modules/inherits": {
"version": "2.0.4",
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
},
"node_modules/llama-tokenizer-js": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/llama-tokenizer-js/-/llama-tokenizer-js-1.0.0.tgz",
"integrity": "sha512-O2FsnoXwOsbrM91bd5iX6cPDJvKwvIRghwhhdgGJr4rxy3Ap9QznORqIJHjTqwy9JF1jiqP6sARo0pB6ojW/Cg=="
},
"node_modules/lodash": {
"version": "4.17.21",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
},
"node_modules/lodash.snakecase": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/lodash.snakecase/-/lodash.snakecase-4.1.1.tgz",
"integrity": "sha512-QZ1d4xoBHYUeuouhEq3lk3Uq7ldgyFXGBhg04+oRLnIz8o9T65Eh+8YdroUwn846zchkA9yDsDl5CVVaV2nqYw=="
},
"node_modules/node-domexception": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
"integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/jimmywarting"
},
{
"type": "github",
"url": "https://paypal.me/jimmywarting"
}
],
"engines": {
"node": ">=10.5.0"
}
},
"node_modules/node-fetch": {
"version": "3.3.1",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.3.1.tgz",
"integrity": "sha512-cRVc/kyto/7E5shrWca1Wsea4y6tL9iYJE5FBCius3JQfb/4P4I295PfhgbJQBLTx6lATE4z+wK0rPM4VS2uow==",
"dependencies": {
"data-uri-to-buffer": "^4.0.0",
"fetch-blob": "^3.1.4",
"formdata-polyfill": "^4.0.10"
},
"engines": {
"node": "^12.20.0 || ^14.13.1 || >=16.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/node-fetch"
}
},
"node_modules/node-nvidia-smi": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/node-nvidia-smi/-/node-nvidia-smi-1.0.0.tgz",
"integrity": "sha512-vjgWll2M/FWihNPKKbhWKSfbk57WrpVv1XOR5iuIxIHNieTUuJ0HrDIZj5gsJ63dx2eTbm/dWm+fWIPfYm+D3Q==",
"dependencies": {
"xml2js": "^0.4.17"
},
"bin": {
"node-nvidia-smi": "node-nvidia-smi.js"
}
},
"node_modules/nth-check": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz",
"integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==",
"dependencies": {
"boolbase": "^1.0.0"
},
"funding": {
"url": "https://github.com/fb55/nth-check?sponsor=1"
}
},
"node_modules/os": {
"version": "0.1.2",
"resolved": "https://registry.npmjs.org/os/-/os-0.1.2.tgz",
"integrity": "sha512-ZoXJkvAnljwvc56MbvhtKVWmSkzV712k42Is2mA0+0KTSRakq5XXuXpjZjgAt9ctzl51ojhQWakQQpmOvXWfjQ=="
},
"node_modules/parse5": {
"version": "7.1.2",
"resolved": "https://registry.npmjs.org/parse5/-/parse5-7.1.2.tgz",
"integrity": "sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==",
"dependencies": {
"entities": "^4.4.0"
},
"funding": {
"url": "https://github.com/inikulin/parse5?sponsor=1"
}
},
"node_modules/parse5-htmlparser2-tree-adapter": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.0.0.tgz",
"integrity": "sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g==",
"dependencies": {
"domhandler": "^5.0.2",
"parse5": "^7.0.0"
},
"funding": {
"url": "https://github.com/inikulin/parse5?sponsor=1"
}
},
"node_modules/peek-readable": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/peek-readable/-/peek-readable-5.0.0.tgz",
"integrity": "sha512-YtCKvLUOvwtMGmrniQPdO7MwPjgkFBtFIrmfSbYmYuq3tKDV/mcfAhBth1+C3ru7uXIZasc/pHnb+YDYNkkj4A==",
"engines": {
"node": ">=14.16"
},
"funding": {
"type": "github",
"url": "https://github.com/sponsors/Borewit"
}
},
"node_modules/readable-stream": {
"version": "3.6.2",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
"integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==",
"dependencies": {
"inherits": "^2.0.3",
"string_decoder": "^1.1.1",
"util-deprecate": "^1.0.1"
},
"engines": {
"node": ">= 6"
}
},
"node_modules/readable-web-to-node-stream": {
"version": "3.0.2",
"resolved": "https://registry.npmjs.org/readable-web-to-node-stream/-/readable-web-to-node-stream-3.0.2.tgz",
"integrity": "sha512-ePeK6cc1EcKLEhJFt/AebMCLL+GgSKhuygrZ/GLaKZYEecIgIECf4UaUuaByiGtzckwR4ain9VzUh95T1exYGw==",
"dependencies": {
"readable-stream": "^3.6.0"
},
"engines": {
"node": ">=8"
},
"funding": {
"type": "github",
"url": "https://github.com/sponsors/Borewit"
}
},
"node_modules/rfc4648": {
"version": "1.5.2",
"resolved": "https://registry.npmjs.org/rfc4648/-/rfc4648-1.5.2.tgz",
"integrity": "sha512-tLOizhR6YGovrEBLatX1sdcuhoSCXddw3mqNVAcKxGJ+J0hFeJ+SjeWCv5UPA/WU3YzWPPuCVYgXBKZUPGpKtg=="
},
"node_modules/safe-buffer": {
"version": "5.2.1",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
"integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
]
},
"node_modules/sax": {
"version": "1.2.4",
"resolved": "https://registry.npmjs.org/sax/-/sax-1.2.4.tgz",
"integrity": "sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw=="
},
"node_modules/streamsearch": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-1.1.0.tgz",
"integrity": "sha512-Mcc5wHehp9aXz1ax6bZUyY5afg9u2rv5cqQI3mRrYkGC8rW2hM02jWuwjtL++LS5qinSyhj2QfLyNsuc+VsExg==",
"engines": {
"node": ">=10.0.0"
}
},
"node_modules/string_decoder": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
"integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==",
"dependencies": {
"safe-buffer": "~5.2.0"
}
},
"node_modules/strtok3": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/strtok3/-/strtok3-7.0.0.tgz",
"integrity": "sha512-pQ+V+nYQdC5H3Q7qBZAz/MO6lwGhoC2gOAjuouGf/VO0m7vQRh8QNMl2Uf6SwAtzZ9bOw3UIeBukEGNJl5dtXQ==",
"dependencies": {
"@tokenizer/token": "^0.3.0",
"peek-readable": "^5.0.0"
},
"engines": {
"node": ">=14.16"
},
"funding": {
"type": "github",
"url": "https://github.com/sponsors/Borewit"
}
},
"node_modules/tiktoken": {
"version": "1.0.10",
"resolved": "https://registry.npmjs.org/tiktoken/-/tiktoken-1.0.10.tgz",
"integrity": "sha512-gF8ndTCNu7WcRFbl1UUWaFIB4CTXmHzS3tRYdyUYF7x3C6YR6Evoao4zhKDmWIwv2PzNbzoQMV8Pxt+17lEDbA=="
},
"node_modules/token-types": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/token-types/-/token-types-5.0.1.tgz",
"integrity": "sha512-Y2fmSnZjQdDb9W4w4r1tswlMHylzWIeOKpx0aZH9BgGtACHhrk3OkT52AzwcuqTRBZtvvnTjDBh8eynMulu8Vg==",
"dependencies": {
"@tokenizer/token": "^0.3.0",
"ieee754": "^1.2.1"
},
"engines": {
"node": ">=14.16"
},
"funding": {
"type": "github",
"url": "https://github.com/sponsors/Borewit"
}
},
"node_modules/ts-mixer": {
"version": "6.0.3",
"resolved": "https://registry.npmjs.org/ts-mixer/-/ts-mixer-6.0.3.tgz",
"integrity": "sha512-k43M7uCG1AkTyxgnmI5MPwKoUvS/bRvLvUb7+Pgpdlmok8AoqmUaZxUUw8zKM5B1lqZrt41GjYgnvAi0fppqgQ=="
},
"node_modules/tslib": {
"version": "2.5.3",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz",
"integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w=="
},
"node_modules/undici": {
"version": "5.22.1",
"resolved": "https://registry.npmjs.org/undici/-/undici-5.22.1.tgz",
"integrity": "sha512-Ji2IJhFXZY0x/0tVBXeQwgPlLWw13GVzpsWPQ3rV50IFMMof2I55PZZxtm4P6iNq+L5znYN9nSTAq0ZyE6lSJw==",
"dependencies": {
"busboy": "^1.6.0"
},
"engines": {
"node": ">=14.0"
}
},
"node_modules/util-deprecate": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
"integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw=="
},
"node_modules/web-streams-polyfill": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.2.1.tgz",
"integrity": "sha512-e0MO3wdXWKrLbL0DgGnUV7WHVuw9OUvL4hjgnPkIeEvESk74gAITi5G606JtZPp39cd8HA9VQzCIvA49LpPN5Q==",
"engines": {
"node": ">= 8"
}
},
"node_modules/ws": {
"version": "8.13.0",
"resolved": "https://registry.npmjs.org/ws/-/ws-8.13.0.tgz",
"integrity": "sha512-x9vcZYTrFPC7aSIbj7sRCYo7L/Xb8Iy+pW0ng0wt2vCJv7M9HOMy0UoN3rr+IFC7hb7vXoqS+P9ktyLLLhO+LA==",
"engines": {
"node": ">=10.0.0"
},
"peerDependencies": {
"bufferutil": "^4.0.1",
"utf-8-validate": ">=5.0.2"
},
"peerDependenciesMeta": {
"bufferutil": {
"optional": true
},
"utf-8-validate": {
"optional": true
}
}
},
"node_modules/xml2js": {
"version": "0.4.23",
"resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.4.23.tgz",
"integrity": "sha512-ySPiMjM0+pLDftHgXY4By0uswI3SPKLDw/i3UXbnO8M/p28zqexCUoPmQFrYD+/1BzhGJSs2i1ERWKJAtiLrug==",
"dependencies": {
"sax": ">=0.6.0",
"xmlbuilder": "~11.0.0"
},
"engines": {
"node": ">=4.0.0"
}
},
"node_modules/xmlbuilder": {
"version": "11.0.1",
"resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-11.0.1.tgz",
"integrity": "sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==",
"engines": {
"node": ">=4.0"
}
}
}
}

View File

@ -10,11 +10,15 @@
"author": "",
"license": "ISC",
"dependencies": {
"discord.js": "^14.9.0",
"cheerio": "^1.0.0-rc.12",
"cpu-stat": "^2.0.1",
"discord.js": "^14.9.0",
"dotenv": "^16.0.3",
"gpt-tokenizer": "^2.1.1",
"llama-tokenizer-js": "^1.0.0",
"node-fetch": "^3.3.1",
"node-nvidia-smi": "^1.0.0",
"os": "^0.1.2",
"cpu-stat": "^2.0.1"
"tiktoken": "^1.0.10"
}
}

View File

@ -1,11 +1,11 @@
FROM python:bullseye
FROM ubuntu:latest
RUN apt-get update; \
apt-get install -y --no-install-recommends \
build-essential
build-essential cmake python3 python3-pip
WORKDIR /usr/src/app
RUN pip install --no-cache-dir llama-cpp-python[server]
RUN CMAKE_ARGS="-DLLAMA_METAL=on" FORCE_CMAKE=1 pip install llama-cpp-python[server]
CMD python3 -m llama_cpp.server