Compare commits

...

65 Commits
main ... main

Author SHA1 Message Date
Raven Scott f53c0c7195 Fix up NodeJS install GPU side 2023-11-14 18:54:17 -05:00
Raven Scott b723f44587 Fix up HF config - Needs testing 2023-11-14 18:21:35 -05:00
Raven Scott 7c7b5d11b7 fix nodejs install for HuggingFace 2023-11-14 18:19:39 -05:00
Raven Scott a5d2530456 Adding error checking for stat message removal 2023-11-14 18:18:37 -05:00
Raven Scott 1f3ff44317 fix token reducer 2023-08-21 21:15:10 -04:00
Raven Scott a980f3cd43 improving token reducer logic 2023-08-15 01:41:39 -04:00
Raven Scott 05e79cba3a fixing up token reducing if sessions too large 2023-08-15 01:08:56 -04:00
Raven Scott f91d66b2b3 Adding assistant to Counter 2023-08-14 23:44:06 -04:00
Raven Scott 6efd069b5d counting tokens properly 2023-08-14 23:00:23 -04:00
Raven Scott bd435ca311 default to disk cache 2023-06-12 18:45:39 +02:00
Raven Scott 61ee13bfbd remove comments 2023-06-12 18:16:16 +02:00
Raven Scott 10473ef702 beginnings of using a tokenizer to check CTX length, remove context if needed 2023-06-12 18:15:11 +02:00
Raven Scott 20c83a656a update dockerfile for GPU 2023-05-31 23:02:07 +02:00
Raven Scott 61c2fed773 update dockerfile for server-non-gpu 2023-05-27 07:35:41 +02:00
Raven Scott 6ed34de804 update readme 2023-05-27 02:02:38 +02:00
Raven Scott 6bf6c1ef28 Dont send anything if time==0 2023-05-23 15:53:53 +02:00
Raven Scott 7102bf32f0 Fix GPU Cache 2023-05-23 14:56:21 +02:00
Raven Scott 099dbf908b update 2023-05-22 18:38:47 +02:00
Raven Scott bc6157e4a1 bug fix 2023-05-22 18:23:17 +02:00
Raven Scott da0650d3b6 resetting botMessage to stop crashing 2023-05-22 17:46:59 +02:00
Raven Scott 393999165b adding GPU ENV Var to main compose 2023-05-22 17:35:20 +02:00
Raven Scott c75926728c comments 2023-05-21 00:20:53 +02:00
Raven Scott 927b5c834d Add warning about caching with cuBLAS 2023-05-20 23:47:16 +02:00
Raven Scott 668b343cbb changing the name scheme for docker-compose 2023-05-20 15:14:25 +02:00
Raven Scott 51a41292e6 bringing back the embed 2023-05-20 15:11:58 +02:00
Raven Scott 2ac55922d2 revert back to older style of workflow until new logic is written 2023-05-20 04:08:27 +02:00
Raven Scott 73636804a5 bug fix 2023-05-20 02:49:17 +02:00
Raven Scott c7a3316d45 bug fix 2023-05-19 23:49:55 +02:00
Raven Scott 1fe0f20e6f revert 2023-05-19 22:55:25 +02:00
Raven Scott 8ce9e18656 removing non needed code 2023-05-19 22:43:47 +02:00
Raven Scott 2924822a49 ensuring GPU env is set in default .env 2023-05-19 21:45:20 +02:00
Raven Scott 368004f10b adding NVIDIA GPU Support with Stats 2023-05-19 21:32:21 +02:00
Raven Scott 4b090592ad Fix hugging face 2023-05-08 22:15:51 +02:00
Raven Scott 64be911772 adding REPEAT_PENALTY 2023-05-08 22:00:24 +02:00
Raven Scott 83a7bb90ed Fix up env 2023-05-08 21:12:57 +02:00
Raven Scott 14fa3b06ff Remove the abuse of looping functions 2023-05-08 15:45:05 +02:00
Raven Scott c7d8735c8a Remove any @ symbol to remove spam pings 2023-05-08 15:42:15 +02:00
Raven Scott 012566e93c update model var 2023-05-07 14:18:17 +02:00
Raven Scott d18cb39ecd update hf 2023-05-06 22:19:08 +02:00
Raven Scott 9bc44c56f8 update hf 2023-05-06 14:34:53 +02:00
Raven Scott 973952aee1 Revert "Urban"
This reverts commit bd5ef4db9a.
2023-05-06 03:41:36 +02:00
Raven Scott 3d2546ebcf Revert "Adding urban dictionary lookups"
This reverts commit f87542132e.
2023-05-06 03:41:16 +02:00
Raven Scott f87542132e Adding urban dictionary lookups 2023-05-06 02:32:10 +02:00
Raven Scott bd5ef4db9a Urban 2023-05-06 02:30:46 +02:00
Raven Scott ee47531d2f HuggingFace Deploy Instructions 2023-05-06 01:49:21 +02:00
Raven Scott 7ea6abb0f8 HuggingFace Deploy Instructions 2023-05-06 01:46:37 +02:00
Raven Scott 4ff16b4fc7 HuggingFace Deploy Instructions 2023-05-06 01:45:01 +02:00
Raven Scott d1807d37ad HuggingFace Deploy Instructions 2023-05-06 01:41:03 +02:00
Raven Scott af13ca3717 Move overflow delay from static to .env 2023-05-05 21:07:39 +02:00
Raven Scott cb880f9bc1 Fix Caching web reading the web: Append last message rather than add to the conversation. 2023-05-05 20:49:29 +02:00
Raven Scott aec98b576b update default.env 2023-05-05 19:31:17 +02:00
Raven Scott 3b1ec922c4 remove \t 2023-05-05 19:08:13 +02:00
Raven Scott e54826085d remove redunant code 2023-05-05 18:32:46 +02:00
Raven Scott 18923b7909 cleaner output from web scrapes 2023-05-05 18:31:24 +02:00
Raven Scott 57545a20ef update 2023-05-05 18:12:43 +02:00
Raven Scott 49f2c08544 update 2023-05-05 18:12:10 +02:00
Raven Scott 45bc433060 return on error 2023-05-05 17:48:32 +02:00
Raven Scott c83366d0a8 update package.json for cheerio 2023-05-05 17:00:40 +02:00
Raven Scott 7c5b036114 update 2023-05-05 16:56:30 +02:00
Raven Scott a338fb725a Adding: Web Access allowing the AI to browse URLs 2023-05-05 16:55:29 +02:00
Raven Scott a099502f1a Fix ShowSystemLoad 2023-05-02 14:38:57 +02:00
Raven Scott c0ab1541e1 Adding refresh interval to system load status 2023-05-02 14:22:58 +02:00
Raven Scott 779ac1f462 Moving REFRESH_INTERVAL to docker-compose 2023-05-02 14:16:13 +02:00
Raven Scott 335b01287e Moving REFRESH_INTERVAL to .env 2023-05-02 14:15:45 +02:00
Raven Scott d4e8563817 lowercase Thinking it looks better :P 2023-05-02 14:07:36 +02:00
14 changed files with 1320 additions and 80 deletions

23
Dockerfile.gpu Normal file
View File

@ -0,0 +1,23 @@
FROM nvidia/cuda:12.1.1-devel-ubuntu20.04
ENV DEBIAN_FRONTEND noninteractive
WORKDIR /app
RUN apt update
RUN apt install sudo curl -y
RUN apt-get install -y ca-certificates curl gnupg
RUN sudo mkdir -p /etc/apt/keyrings
RUN curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | sudo gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
ENV NODE_MAJOR=18
RUN echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | sudo tee /etc/apt/sources.list.d/nodesource.list
RUN apt-get update
RUN apt-get install nodejs -y
COPY package*.json ./
RUN npm install --omit=dev
COPY . .
CMD node llamabot.js

View File

@ -78,4 +78,26 @@ This will automatically configure the API for you as well as the bot in two sepe
5. `docker compose up -d`
# Docker Compose with GPU
This will automatically configure the API that supports cuBLAS and GPU inference for you as well as the bot in two seperate containers within a stack.
NOTE: Caching for GPU has been fixed.
1. `git clone https://git.ssh.surf/snxraven/llama-cpp-python-djs-bot.git` - Clone the repo
2. `mv docker-compose.yml docker-compose.nogpu.yml; mv docker-compose.gpu.yml docker-compose.yml;` - Move nongpu compose out of the way, Enable GPU Support
3. `mv Dockerfile Dockerfile.nongpu; mv Dockerfile.gpu Dockerfile;` - Move nongpu Dockerfile out of the way, enable GPU Support
3. `cp default.gpu.env .env` - Copy the default GPU .env to its proper location
4. Set DATA_DIR in .env to the exact location of your model files.
5. Edit docker-compose.yaml MODEL to ensure the correct model bin is set
6. set N_GPU_LAYERS to the amount of layers you would like to export to GPU
7. `docker compose up -d`
Want to make this better? Issue a pull request!

View File

@ -1,13 +1,58 @@
THE_TOKEN = "DISCORD_TOKEN_HERE"
CHANNEL_IDS = 1094494101631680653,1094628334727614605
INIT_PROMPT = "Assistant name: ChatAI. You code, write and provide any information without any mistakes."
ROOT_PORT = 8000
DATA_DIR = /home/USERNAME/weights
CACHE = 1
N_THREADS = 4
MAX_TOKENS = 1024
# Discord Token
THE_TOKEN = ""
# The Channel IDs the bot will operate in seperated by commas
CHANNEL_IDS =
# The INIT prompt for all conversations.
INIT_PROMPT = "Assistant name: ChatAI. You code, write and provide any information without any mistakes. You can read basic text from URLs if a user sends a user via scraping."
# Loading Emebed Refresh Timing
REFRESH_INTERVAL=2
# When a message is too large for discord we chunk the response into seperate messages.
# To ensure we do not rate limit the bot we send these at a delay interval.
# DEFAULT: 3 a good setting is between 3 and 7 seconds.
OVERFLOW_DELAY=3
# Max Content to fetch from given URLs
MAX_CONTENT_LENGTH=2000
# Max tokens for Generations
MAX_TOKENS = 1499
# ROOT_IP is only used when running the bot without docker compose
ROOT_IP = 192.168.0.15
ROOT_IP = 127.0.0.1
# PORT is only used when running the bot without docker compose
ROOT_PORT = 8000
# Directory to your models (llama.cpp specfic settings)
DATA_DIR = /Users/username/code/models
# Enable Expirmental Message Caches (Limited to single session)
# Cache will use ~1.4 GB or MORE of RAM. ONLY ENABLE IF YOUR SYSTEM CAN HANDLE THIS.
CACHE = 1
CACHE_TYPE = "disk"
# Set number of threads to use, currently, a standard thread will utilize 1 whole core
# I usually will set this between all cores I physcally have OR 2 cores less to allow for other processes.
N_THREADS = 10
# Always use MMAP unless you know what you are doing
#USE_MMAP=1
# Only use MLOCK if you know what it does!
USE_MLOCK=0
# The higher the number the more hard core.
REPEAT_PENALTY=1
# GPU SPECIFIC SETTINGS BELOW
GPU=0
N_GPU_LAYERS=32
PYTHONUNBUFFERED=1

34
docker-compose.gpu.yml Normal file
View File

@ -0,0 +1,34 @@
version: '3.9'
services:
backend:
container_name: llama-gpu-server
restart: unless-stopped
build:
context: ./gpu-server
env_file: .env
volumes:
- ${DATA_DIR}:/usr/src/app/models
environment:
- HOST=llama-gpu-server
- MODEL=./models/ggml-vic7b-q5_1.bin.1
- NVIDIA_VISIBLE_DEVICES=all
runtime: nvidia
frontend:
container_name: llama-djs-bot
restart: unless-stopped
build:
context: .
depends_on:
- backend
environment:
- THE_TOKEN
- REFRESH_INTERVAL
- CHANNEL_IDS
- GPU
- ROOT_IP=llama-gpu-server
- ROOT_PORT=8000
- INIT_PROMPT='Assistant Name':' ChatAI. You code, write and provide any information without any mistakes.'
- NVIDIA_VISIBLE_DEVICES=all
runtime: nvidia

View File

@ -11,7 +11,7 @@ services:
- ${DATA_DIR}:/usr/src/app/models
environment:
- HOST=llama-python-server
- MODEL=./models/gpt4-x-alpaca-13b-native-4bit-128g.bin
- MODEL=./models/vicuna-7b-1.1.ggmlv3.q6_K.bin
llama-python-djs-bot:
container_name: llama-python-djs-bot
restart: unless-stopped
@ -21,6 +21,8 @@ services:
- llama-python-server
environment:
- THE_TOKEN
- GPU
- REFRESH_INTERVAL
- CHANNEL_IDS
- ROOT_IP=llama-python-server
- ROOT_PORT=8000

19
gpu-server/Dockerfile Normal file
View File

@ -0,0 +1,19 @@
FROM nvidia/cuda:12.1.1-devel-ubuntu20.04
# Install the deps
ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=Etc/GMT
RUN apt-get update && apt-get install -y --no-install-recommends python3 python3-pip git cmake build-essential
# Get llama-cpp-python
WORKDIR /usr/src
WORKDIR /usr/src/app
# Build llama-cpp-python w/CuBLAS
RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python[server]
# We need to set the host to 0.0.0.0 to allow outside access
ENV HOST 0.0.0.0
# Run the server
CMD python3 -m llama_cpp.server

View File

@ -0,0 +1,48 @@
FROM ubuntu:latest
RUN apt update
RUN DEBIAN_FRONTEND=noninteractive apt install curl sudo -y
RUN apt-get install -y ca-certificates curl gnupg
RUN sudo mkdir -p /etc/apt/keyrings
RUN curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | sudo gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
ENV NODE_MAJOR=18
RUN echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | sudo tee /etc/apt/sources.list.d/nodesource.list
RUN apt-get update
RUN apt-get install nodejs -y
RUN DEBIAN_FRONTEND=noninteractive apt install nodejs -y
RUN npm i pm2 -g
RUN mkdir -p /code/.pm2
RUN mkdir -p /.pm2
RUN chmod 0777 /code/.pm2
RUN chmod 0777 /.pm2
RUN DEBIAN_FRONTEND=noninteractive apt install wget python3 python3-pip -y
WORKDIR /code
RUN pip install --no-cache-dir llama-cpp-python[server]
COPY . .
RUN npm i
ENV HOST localhost
ENV PORT 7860
ENV MODEL=/code/mistral-7b-instruct-v0.1.Q2_K.gguf
ENV CACHE=1
ENV USE_MLOCK=0
ENV REPEAT_PENALTY=1
ENV MODEL=/code/mistral-7b-instruct-v0.1.Q2_K.gguf
ENV PM2_HOME=/code/.pm2
RUN wget -q -O mistral-7b-instruct-v0.1.Q2_K.gguf "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q2_K.gguf?download=true"
CMD /bin/bash /code/start.sh

View File

@ -0,0 +1,17 @@
# How to Deploy on Hugging Face
1) Move the "assets" directory and llamabot.js, package.json files into your HuggingFace repo directory.
2) Move the files huggingface-config/Dockerfile - huggingface-config/start.sh - huggingface-config/startServices.json into the root of the hugging face repo.
3) Edit Docker file as you need, the dockerfile is set to automatically download Vicuna 1.1 7B
3) Move default.env into your repo as .env and edit for your needs
4) Push the changes
You should then see the bot being built and deployed on HuggingFace
pm2 log will run automatically so you can see frontend and backend logs.
PLEASE NOTE: Your hugging face repo should remain private!

View File

@ -0,0 +1,6 @@
#!/bin/bash
# This is the main process of the container and will stay alive as long as pm2 log is running.
pm2 start startServices.json
pm2 log

View File

@ -0,0 +1,14 @@
{
"apps": [
{
"name": "Discord-Bot",
"script": "node /code/llamabot.js",
"args" : ""
},
{
"name": "AI-API",
"script": "python3 -m llama_cpp.server",
"args" : ""
}
]
}

View File

@ -5,6 +5,8 @@ import { resetResponses, userResetMessages } from './assets/resetMessages.js';
import { errorMessages, busyResponses } from './assets/errorMessages.js';
import cpuStat from 'cpu-stat';
import os from 'os';
import smi from 'node-nvidia-smi';
import llamaTokenizer from 'llama-tokenizer-js'
import {
Client,
@ -27,8 +29,14 @@ const client = new Client({
// Grab ChannelIDs from the .env file
const channelIDs = process.env.CHANNEL_IDS.split(',');
// Store Conversations in a MAP
const conversations = new Map();
let botMessage; // define a variable to hold the message object
// Set busy function this allows us to set our bot into busy mode
// locking out all other tasks until the current one is complete
function setBusy(userId, isBusy) {
if (conversations.has(userId)) {
conversations.get(userId).busy = isBusy;
@ -39,6 +47,8 @@ function setBusy(userId, isBusy) {
}
}
// General check, if any conversation is busy
// If yes, flag it and let us know
function isAnyConversationBusy() {
for (const conversation of conversations.values()) {
if (conversation.busy) {
@ -49,6 +59,7 @@ function isAnyConversationBusy() {
return false;
}
// Setting our precence to busy within the bots status
function setPresenceBusy() {
client.user.setPresence({
activities: [{
@ -59,6 +70,8 @@ function setPresenceBusy() {
});
}
// Setting our precence to ready within the bots status
function setPresenceOnline() {
client.user.setPresence({
activities: [{
@ -70,18 +83,23 @@ function setPresenceOnline() {
}
// When we have logged in to discord api
// Set precence to online.
client.once('ready', () => {
console.log('Bot is ready.');
setPresenceOnline()
});
// When a message is sent within discord, lets handle it.
client.on('messageCreate', async (message) => {
// Function to send a random message from any array
async function sendRand(array) {
const arrayChoice = array[Math.floor(Math.random() * array.length)];
await message.channel.send(arrayChoice); // give a notification of reset using a human like response.
}
// Function to send a random Direct Message from any array
async function sendRandDM(array) {
const arrayChoice = array[Math.floor(Math.random() * array.length)];
await message.author.send(arrayChoice); // give a notification of reset using a human like response.
@ -92,7 +110,8 @@ client.on('messageCreate', async (message) => {
return;
}
if (message.author.bot) return; // Ignore messages from bots
// Always ignore bots!
if (message.author.bot) return;
// Check if any conversation is busy
if (isAnyConversationBusy()) {
@ -102,12 +121,18 @@ client.on('messageCreate', async (message) => {
sendRandDM(busyResponses);
return;
}
// Set user ID and get our conversation.
const userID = message.author.id;
let conversation = conversations.get(userID) || {
messages: [],
busy: false
};
// If we do not have a conversation, lets generate one.
// This requires a chatflow for the API.
// Its better to have a default beginning conversation
// Providing context for the AI Model.
if (conversation.messages.length === 0) {
conversation.messages.push({
role: 'user',
@ -123,12 +148,15 @@ client.on('messageCreate', async (message) => {
});
}
// If a user needs a reset, we delete their MAP
if (message.content === '!reset' || message.content === '!r') {
conversations.delete(userID); // Delete user's conversation map if they request reset
sendRand(userResetMessages)
return;
}
// Begin processing our conversation, this is our main work flow.
// Append user message to conversation history
conversation.messages.push({
role: 'user',
@ -136,12 +164,18 @@ client.on('messageCreate', async (message) => {
});
try {
// Now we have our conversation set up
// Lets set precence to busy
// We also will set our conversations MAP to busy
// Locking out all other tasks
setPresenceBusy()
setBusy(message.author.id, true);
// Lets start generating the response
const response = await generateResponse(conversation, message);
// Append bot message to conversation history
// Append bot message to conversation history when it is ready
conversation.messages.push({
role: 'assistant',
content: response
@ -153,18 +187,27 @@ client.on('messageCreate', async (message) => {
// if we are over the discord char limit we need chunks...
if (response.length > limit) {
const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
for (let i = 0; i < chunks.length; i++) {
setTimeout(() => {
message.channel.send(chunks[i]);
}, i * 3000); // delay of 3 seconds between each chunk to save on API requests
}
// We are going to check all of the message chunks if our response is too large for discord.
// We can extend our message size using chunks, the issue?
// Users can abuse this feature, we lock this to 15 to avoid API Abuse.
const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
if (chunks.length >= 40) return await message.channel.send("Response chunks too large. Try again");
// If we do now have too many chunks, lets send each one using our overflow delay
for (let i = 0; i < chunks.length; i++) {
setTimeout(() => {
message.channel.send(chunks[i]);
}, i * (process.env.OVERFLOW_DELAY || 3) * 1000); // delay of 3 seconds between each chunk to save on API requests
}
} else {
// We are good to go, send the response
await message.channel.send(response);
// We are good to go message is not too large for discord, send the response
await message.channel.send(response.replace("@", ""));
}
// We have completed our task, lets go online
setPresenceOnline()
// set our conversation MAP to not busy
setBusy(message.author.id, false);
} else {
// Handle empty response here
@ -174,89 +217,284 @@ client.on('messageCreate', async (message) => {
setPresenceOnline()
conversation.busy = false;
}
conversations.set(userID, conversation); // Update user's conversation map in memory
// Print the current conversation as it stands
console.log(conversation)
} catch (err) {
// If we have any errors lets send a response
console.error(err);
sendRand(errorMessages)
return sendRand(errorMessages)
} finally {
// We are done! Lets finish up going online
setPresenceOnline()
setBusy(message.author.id, false);
}
});
// Import cheerio for scraping
import cheerio from 'cheerio';
async function generateResponse(conversation, message) {
// Begin web scraper if a https:// OR http:// URL is detected
// Check if message contains a URL
const urlRegex = /(https?:\/\/[^\s]+)/g;
// Match our REGEX
const urls = message.content.match(urlRegex);
if (urls) {
// If there are multiple URLs, process them one by one
for (const url of urls) {
try {
const res = await fetch(url);
const html = await res.text();
const $ = cheerio.load(html);
// Extract page title, meta description and content
const pageTitle = $('head title').text().trim();
const pageDescription = $('head meta[name="description"]').attr('content');
const pageContent = $('body').text().trim();
// Construct response message with page details
let response = `Title: ${pageTitle}\n`;
if (pageDescription) {
response += `Description: ${pageDescription}\n`;
}
if (pageContent) {
// Lets check for content and grab only the amount as configured.
const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH;
let plainTextContent = $('<div>').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' ');
// Clean up code remove it from processing
const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/;
const isCode = codePattern.test(plainTextContent);
if (isCode) {
plainTextContent = plainTextContent.replace(codePattern, '');
}
// Remove anything enclosed in brackets JUNK DATA
plainTextContent = plainTextContent.replace(/ *\([^)]*\) */g, '');
if (plainTextContent.length > MAX_CONTENT_LENGTH) {
plainTextContent = plainTextContent.substring(0, MAX_CONTENT_LENGTH) + '...';
}
response += `Content: ${plainTextContent.trim()}`;
}
response += `URL: ${url}`;
// Get the index of the last message in the array
const lastMessageIndex = conversation.messages.length - 1;
// Append a new line and the new content to the existing content of the last message
conversation.messages[lastMessageIndex].content += "\n" + response;
console.log("A URL was provided, response: " + response)
} catch (err) {
console.error(err);
return sendRand(errorMessages);
}
}
}
// We need an abort controller to stop our progress message editor
const controller = new AbortController();
// Set our timeout for the controller
const timeout = setTimeout(() => {
controller.abort();
}, 900000);
// Copy our messages from MAP
const messagesCopy = [...conversation.messages]; // create a copy of the messages array
let botMessage; // define a variable to hold the message object
let time = 0
// define a function that shows the system load percentage and updates the message
const showSystemLoad = async () => {
time = time + 7;
cpuStat.usagePercent(function(err, percent, seconds) {
if (err) {
return console.log(err);
}
const systemLoad = percent;
const freeMemory = os.freemem() / 1024 / 1024 / 1024;
const totalMemory = os.totalmem() / 1024 / 1024 / 1024;
const usedMemory = totalMemory - freeMemory;
const embedData = {
color: 0x0099ff,
title: 'Please wait.. I am Thinking...',
fields: [
{
name: 'System Load',
value: `${systemLoad.toFixed(2)}%`,
},
{
name: 'Memory Usage',
value: `${usedMemory.toFixed(2)} GB / ${totalMemory.toFixed(2)} GB`,
},
{
name: 'Time',
value: `~${time} seconds.`,
},
],
};
// if the message object doesn't exist, create it
if (!botMessage) {
(async () => {
botMessage = await message.channel.send({ embeds: [embedData] });
})();
} else {
botMessage.edit({ embeds: [embedData] }); // otherwise, update the message
}
// Configure our inital time
time = Number(time) + Number(process.env.REFRESH_INTERVAL);
// Get system stats
cpuStat.usagePercent(function (err, percent, seconds) {
if (err) {
return console.log(err);
}
// Setting out system stat vars
const systemLoad = percent;
const freeMemory = os.freemem() / 1024 / 1024 / 1024;
const totalMemory = os.totalmem() / 1024 / 1024 / 1024;
const usedMemory = totalMemory - freeMemory;
// lets build some embed data
let embedData;
// If we have NO GPU config lets send system stats only
if (process.env.GPU == 0) {
embedData = {
color: 0x0099ff,
title: 'Please wait.. I am thinking...',
fields: [
{
name: 'System Load',
value: `${systemLoad.toFixed(2)}%`,
},
{
name: 'Memory Usage',
value: `${usedMemory.toFixed(2)} GB / ${totalMemory.toFixed(2)} GB`,
},
{
name: 'Time',
value: `~${time} seconds.`,
},
],
};
// if the message object doesn't exist, create it
if (!botMessage) {
(async () => {
if (time == 0) return
botMessage = await message.channel.send({ embeds: [embedData] });
})();
} else {
(async () => {
if (!isAnyConversationBusy()) {
botMessage.delete()
} else {
await botMessage.edit({ embeds: [embedData] }); // otherwise, update the message
}
})();
}
} else {
// If we do have GPU=1 lets send some card info too!
smi(function (err, data) {
if (err) {
// Handle error if smi function fails
console.error(err);
return;
}
let utilization = data.nvidia_smi_log.gpu.utilization;
let gpuUtilization = utilization.gpu_util;
let memoryUtilization = utilization.memory_util;
let gpuTemp = data.nvidia_smi_log.gpu.temperature.gpu_temp;
// These are not used until nvidia-docker fixes their support
let gpuTarget = data.nvidia_smi_log.gpu.temperature.gpu_target_temperature;
let gpuFanSpeed = data.nvidia_smi_log.gpu.fan_speed;
embedData = {
color: 0x0099ff,
title: 'Please wait.. I am thinking...',
fields: [
{
name: 'System Load',
value: `${systemLoad.toFixed(2)}%`,
},
{
name: 'Memory Usage',
value: `${usedMemory.toFixed(2)} GB / ${totalMemory.toFixed(2)} GB`,
},
{
name: 'GPU Utilization',
value: `${gpuUtilization}`,
},
{
name: 'Memory Utilization',
value: `${memoryUtilization}`,
},
{
name: 'GPU Temperature',
value: `${gpuTemp}`,
},
{
name: 'Time',
value: `~${time} seconds.`,
},
],
};
// if the message object doesn't exist, create it
if (!botMessage) {
(async () => {
if (time == 0) return
botMessage = await message.channel.send({ embeds: [embedData] });
})();
} else {
(async () => {
if (!isAnyConversationBusy()) {
botMessage.delete()
} else {
await botMessage.edit({ embeds: [embedData] }); // otherwise, update the message
}
})();
}
})
}
});
};
// call the function initially
await showSystemLoad();
// refresh the system load percentage and update the message every 7 seconds
const refreshInterval = setInterval(showSystemLoad, 7000);
};
try {
// call the function initially
await showSystemLoad();
// Grab the REFRESH_INTERVAL from ENV if not exist, lets use 7 (seconds)
const refreshInterval = setInterval(showSystemLoad, (process.env.REFRESH_INTERVAL || 7) * 1000);
function countLlamaTokens(messages) {
let totalTokens = 0;
for (const message of messages) {
if (message.role === 'user' || message.role === 'assistant') {
const encodedTokens = llamaTokenizer.encode(message.content);
totalTokens += encodedTokens.length;
}
}
return totalTokens;
}
let totalTokens = countLlamaTokens(messagesCopy);
console.log(`Total Llama tokens: ${totalTokens}`);
let tokenLength = totalTokens
// Remove older conversations if necessary
const maxLength = 1800;
const tolerance = 25; // allow for some flexibility in the token length
if (tokenLength > maxLength + tolerance) {
const diff = tokenLength - (maxLength + tolerance);
let removedTokens = 0;
// Iterate over the messages in reverse order
for (let i = messagesCopy.length - 1; i >= 0; i--) {
const message = messagesCopy[i];
const messageTokens = countLlamaTokens([message]);
// Check if the current message plus the tokens in the message is less than or equal to the diff
if (removedTokens + messageTokens <= diff) {
messagesCopy.splice(i, 1);
removedTokens += messageTokens;
console.log(removedTokens + " removed \nAfter Resize: " + countLlamaTokens(messagesCopy));
} else {
// Remove more than one message if necessary to bring the total length below the maximum allowed length
const messagesToRemove = Math.floor(diff / messageTokens);
for (let j = 0; j < messagesToRemove; j++) {
messagesCopy.splice(i, 1);
removedTokens += messageTokens;
}
break;
}
}
}
// Sending request to our API
const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
method: 'POST',
headers: {
'accept': 'application/json',
'Content-Type': 'application/json'
'accept': 'application/json',
'Content-Type': 'application/json'
},
body: JSON.stringify({
messages: messagesCopy,
max_tokens: Number(process.env.MAX_TOKENS) // add the max_tokens parameter here
messages: messagesCopy,
max_tokens: Number(process.env.MAX_TOKENS), // add the max_tokens parameter here
repeat_penalty: Number(process.env.REPEAT_PENALTY)
}),
signal: controller.signal
});
});
const responseData = await response.json();
console.log(JSON.stringify(responseData));
@ -265,9 +503,18 @@ async function generateResponse(conversation, message) {
const responseText = choice.message.content;
// clear the interval, replace the "please wait" message with the response, and update the message
clearInterval(refreshInterval);
console.log(responseText);
botMessage.delete()
try {
if (time > 2) {
await botMessage.delete();
clearInterval(refreshInterval);
botMessage = null;
console.log("Time limit reached. Message deleted.");
}
} catch (err) {
console.log("Error deleting message: ", err);
}
return responseText;
@ -275,8 +522,9 @@ async function generateResponse(conversation, message) {
throw err;
} finally {
clearTimeout(timeout);
botMessage = null;
time = 0
}
}
client.login(process.env.THE_TOKEN); // Replace with your bot token
client.login(process.env.THE_TOKEN); // Replace with your bot token

757
package-lock.json generated Normal file
View File

@ -0,0 +1,757 @@
{
"name": "llama-cpp-python-djs-bot",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "llama-cpp-python-djs-bot",
"version": "1.0.0",
"license": "ISC",
"dependencies": {
"cheerio": "^1.0.0-rc.12",
"cpu-stat": "^2.0.1",
"discord.js": "^14.9.0",
"dotenv": "^16.0.3",
"gpt-tokenizer": "^2.1.1",
"llama-tokenizer-js": "^1.0.0",
"node-fetch": "^3.3.1",
"node-nvidia-smi": "^1.0.0",
"os": "^0.1.2",
"tiktoken": "^1.0.10"
}
},
"node_modules/@discordjs/builders": {
"version": "1.6.3",
"resolved": "https://registry.npmjs.org/@discordjs/builders/-/builders-1.6.3.tgz",
"integrity": "sha512-CTCh8NqED3iecTNuiz49mwSsrc2iQb4d0MjMdmS/8pb69Y4IlzJ/DIy/p5GFlgOrFbNO2WzMHkWKQSiJ3VNXaw==",
"dependencies": {
"@discordjs/formatters": "^0.3.1",
"@discordjs/util": "^0.3.1",
"@sapphire/shapeshift": "^3.8.2",
"discord-api-types": "^0.37.41",
"fast-deep-equal": "^3.1.3",
"ts-mixer": "^6.0.3",
"tslib": "^2.5.0"
},
"engines": {
"node": ">=16.9.0"
}
},
"node_modules/@discordjs/collection": {
"version": "1.5.1",
"resolved": "https://registry.npmjs.org/@discordjs/collection/-/collection-1.5.1.tgz",
"integrity": "sha512-aWEc9DCf3TMDe9iaJoOnO2+JVAjeRNuRxPZQA6GVvBf+Z3gqUuWYBy2NWh4+5CLYq5uoc3MOvUQ5H5m8CJBqOA==",
"engines": {
"node": ">=16.9.0"
}
},
"node_modules/@discordjs/formatters": {
"version": "0.3.1",
"resolved": "https://registry.npmjs.org/@discordjs/formatters/-/formatters-0.3.1.tgz",
"integrity": "sha512-M7X4IGiSeh4znwcRGcs+49B5tBkNDn4k5bmhxJDAUhRxRHTiFAOTVUNQ6yAKySu5jZTnCbSvTYHW3w0rAzV1MA==",
"dependencies": {
"discord-api-types": "^0.37.41"
},
"engines": {
"node": ">=16.9.0"
}
},
"node_modules/@discordjs/rest": {
"version": "1.7.1",
"resolved": "https://registry.npmjs.org/@discordjs/rest/-/rest-1.7.1.tgz",
"integrity": "sha512-Ofa9UqT0U45G/eX86cURQnX7gzOJLG2oC28VhIk/G6IliYgQF7jFByBJEykPSHE4MxPhqCleYvmsrtfKh1nYmQ==",
"dependencies": {
"@discordjs/collection": "^1.5.1",
"@discordjs/util": "^0.3.0",
"@sapphire/async-queue": "^1.5.0",
"@sapphire/snowflake": "^3.4.2",
"discord-api-types": "^0.37.41",
"file-type": "^18.3.0",
"tslib": "^2.5.0",
"undici": "^5.22.0"
},
"engines": {
"node": ">=16.9.0"
}
},
"node_modules/@discordjs/util": {
"version": "0.3.1",
"resolved": "https://registry.npmjs.org/@discordjs/util/-/util-0.3.1.tgz",
"integrity": "sha512-HxXKYKg7vohx2/OupUN/4Sd02Ev3PBJ5q0gtjdcvXb0ErCva8jNHWfe/v5sU3UKjIB/uxOhc+TDOnhqffj9pRA==",
"engines": {
"node": ">=16.9.0"
}
},
"node_modules/@discordjs/ws": {
"version": "0.8.3",
"resolved": "https://registry.npmjs.org/@discordjs/ws/-/ws-0.8.3.tgz",
"integrity": "sha512-hcYtppanjHecbdNyCKQNH2I4RP9UrphDgmRgLYrATEQF1oo4sYSve7ZmGsBEXSzH72MO2tBPdWSThunbxUVk0g==",
"dependencies": {
"@discordjs/collection": "^1.5.1",
"@discordjs/rest": "^1.7.1",
"@discordjs/util": "^0.3.1",
"@sapphire/async-queue": "^1.5.0",
"@types/ws": "^8.5.4",
"@vladfrangu/async_event_emitter": "^2.2.1",
"discord-api-types": "^0.37.41",
"tslib": "^2.5.0",
"ws": "^8.13.0"
},
"engines": {
"node": ">=16.9.0"
}
},
"node_modules/@sapphire/async-queue": {
"version": "1.5.0",
"resolved": "https://registry.npmjs.org/@sapphire/async-queue/-/async-queue-1.5.0.tgz",
"integrity": "sha512-JkLdIsP8fPAdh9ZZjrbHWR/+mZj0wvKS5ICibcLrRI1j84UmLMshx5n9QmL8b95d4onJ2xxiyugTgSAX7AalmA==",
"engines": {
"node": ">=v14.0.0",
"npm": ">=7.0.0"
}
},
"node_modules/@sapphire/shapeshift": {
"version": "3.9.2",
"resolved": "https://registry.npmjs.org/@sapphire/shapeshift/-/shapeshift-3.9.2.tgz",
"integrity": "sha512-YRbCXWy969oGIdqR/wha62eX8GNHsvyYi0Rfd4rNW6tSVVa8p0ELiMEuOH/k8rgtvRoM+EMV7Csqz77YdwiDpA==",
"dependencies": {
"fast-deep-equal": "^3.1.3",
"lodash": "^4.17.21"
},
"engines": {
"node": ">=v14.0.0",
"npm": ">=7.0.0"
}
},
"node_modules/@sapphire/snowflake": {
"version": "3.5.1",
"resolved": "https://registry.npmjs.org/@sapphire/snowflake/-/snowflake-3.5.1.tgz",
"integrity": "sha512-BxcYGzgEsdlG0dKAyOm0ehLGm2CafIrfQTZGWgkfKYbj+pNNsorZ7EotuZukc2MT70E0UbppVbtpBrqpzVzjNA==",
"engines": {
"node": ">=v14.0.0",
"npm": ">=7.0.0"
}
},
"node_modules/@tokenizer/token": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/@tokenizer/token/-/token-0.3.0.tgz",
"integrity": "sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A=="
},
"node_modules/@types/node": {
"version": "20.3.0",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.3.0.tgz",
"integrity": "sha512-cumHmIAf6On83X7yP+LrsEyUOf/YlociZelmpRYaGFydoaPdxdt80MAbu6vWerQT2COCp2nPvHdsbD7tHn/YlQ=="
},
"node_modules/@types/ws": {
"version": "8.5.5",
"resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.5.5.tgz",
"integrity": "sha512-lwhs8hktwxSjf9UaZ9tG5M03PGogvFaH8gUgLNbN9HKIg0dvv6q+gkSuJ8HN4/VbyxkuLzCjlN7GquQ0gUJfIg==",
"dependencies": {
"@types/node": "*"
}
},
"node_modules/@vladfrangu/async_event_emitter": {
"version": "2.2.2",
"resolved": "https://registry.npmjs.org/@vladfrangu/async_event_emitter/-/async_event_emitter-2.2.2.tgz",
"integrity": "sha512-HIzRG7sy88UZjBJamssEczH5q7t5+axva19UbZLO6u0ySbYPrwzWiXBcC0WuHyhKKoeCyneH+FvYzKQq/zTtkQ==",
"engines": {
"node": ">=v14.0.0",
"npm": ">=7.0.0"
}
},
"node_modules/boolbase": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
"integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww=="
},
"node_modules/busboy": {
"version": "1.6.0",
"resolved": "https://registry.npmjs.org/busboy/-/busboy-1.6.0.tgz",
"integrity": "sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA==",
"dependencies": {
"streamsearch": "^1.1.0"
},
"engines": {
"node": ">=10.16.0"
}
},
"node_modules/cheerio": {
"version": "1.0.0-rc.12",
"resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0-rc.12.tgz",
"integrity": "sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==",
"dependencies": {
"cheerio-select": "^2.1.0",
"dom-serializer": "^2.0.0",
"domhandler": "^5.0.3",
"domutils": "^3.0.1",
"htmlparser2": "^8.0.1",
"parse5": "^7.0.0",
"parse5-htmlparser2-tree-adapter": "^7.0.0"
},
"engines": {
"node": ">= 6"
},
"funding": {
"url": "https://github.com/cheeriojs/cheerio?sponsor=1"
}
},
"node_modules/cheerio-select": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz",
"integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==",
"dependencies": {
"boolbase": "^1.0.0",
"css-select": "^5.1.0",
"css-what": "^6.1.0",
"domelementtype": "^2.3.0",
"domhandler": "^5.0.3",
"domutils": "^3.0.1"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/cpu-stat": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/cpu-stat/-/cpu-stat-2.0.1.tgz",
"integrity": "sha512-bC4ts/0IjYfNV6Dc7F2NauWM0tip0fneZjRek8HqX2ZERC4oSt6dmV+GTN1mfE9OKbLAppv58M2PVzKLGB731w=="
},
"node_modules/css-select": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/css-select/-/css-select-5.1.0.tgz",
"integrity": "sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==",
"dependencies": {
"boolbase": "^1.0.0",
"css-what": "^6.1.0",
"domhandler": "^5.0.2",
"domutils": "^3.0.1",
"nth-check": "^2.0.1"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/css-what": {
"version": "6.1.0",
"resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz",
"integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==",
"engines": {
"node": ">= 6"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/data-uri-to-buffer": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz",
"integrity": "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==",
"engines": {
"node": ">= 12"
}
},
"node_modules/discord-api-types": {
"version": "0.37.43",
"resolved": "https://registry.npmjs.org/discord-api-types/-/discord-api-types-0.37.43.tgz",
"integrity": "sha512-bBhDWU3TF9KADxR/mHp1K4Bvu/LRtFQdGyBjADu4e66F3ZnD4kp12W/SJCttIaCcMXzPV3sfty6eDGRNRph51Q=="
},
"node_modules/discord.js": {
"version": "14.11.0",
"resolved": "https://registry.npmjs.org/discord.js/-/discord.js-14.11.0.tgz",
"integrity": "sha512-CkueWYFQ28U38YPR8HgsBR/QT35oPpMbEsTNM30Fs8loBIhnA4s70AwQEoy6JvLcpWWJO7GY0y2BUzZmuBMepQ==",
"dependencies": {
"@discordjs/builders": "^1.6.3",
"@discordjs/collection": "^1.5.1",
"@discordjs/formatters": "^0.3.1",
"@discordjs/rest": "^1.7.1",
"@discordjs/util": "^0.3.1",
"@discordjs/ws": "^0.8.3",
"@sapphire/snowflake": "^3.4.2",
"@types/ws": "^8.5.4",
"discord-api-types": "^0.37.41",
"fast-deep-equal": "^3.1.3",
"lodash.snakecase": "^4.1.1",
"tslib": "^2.5.0",
"undici": "^5.22.0",
"ws": "^8.13.0"
},
"engines": {
"node": ">=16.9.0"
}
},
"node_modules/dom-serializer": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz",
"integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==",
"dependencies": {
"domelementtype": "^2.3.0",
"domhandler": "^5.0.2",
"entities": "^4.2.0"
},
"funding": {
"url": "https://github.com/cheeriojs/dom-serializer?sponsor=1"
}
},
"node_modules/domelementtype": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz",
"integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/fb55"
}
]
},
"node_modules/domhandler": {
"version": "5.0.3",
"resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz",
"integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==",
"dependencies": {
"domelementtype": "^2.3.0"
},
"engines": {
"node": ">= 4"
},
"funding": {
"url": "https://github.com/fb55/domhandler?sponsor=1"
}
},
"node_modules/domutils": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/domutils/-/domutils-3.1.0.tgz",
"integrity": "sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==",
"dependencies": {
"dom-serializer": "^2.0.0",
"domelementtype": "^2.3.0",
"domhandler": "^5.0.3"
},
"funding": {
"url": "https://github.com/fb55/domutils?sponsor=1"
}
},
"node_modules/dotenv": {
"version": "16.1.4",
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.1.4.tgz",
"integrity": "sha512-m55RtE8AsPeJBpOIFKihEmqUcoVncQIwo7x9U8ZwLEZw9ZpXboz2c+rvog+jUaJvVrZ5kBOeYQBX5+8Aa/OZQw==",
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://github.com/motdotla/dotenv?sponsor=1"
}
},
"node_modules/entities": {
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
"integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
"engines": {
"node": ">=0.12"
},
"funding": {
"url": "https://github.com/fb55/entities?sponsor=1"
}
},
"node_modules/fast-deep-equal": {
"version": "3.1.3",
"resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
"integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q=="
},
"node_modules/fetch-blob": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.2.0.tgz",
"integrity": "sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/jimmywarting"
},
{
"type": "paypal",
"url": "https://paypal.me/jimmywarting"
}
],
"dependencies": {
"node-domexception": "^1.0.0",
"web-streams-polyfill": "^3.0.3"
},
"engines": {
"node": "^12.20 || >= 14.13"
}
},
"node_modules/file-type": {
"version": "18.5.0",
"resolved": "https://registry.npmjs.org/file-type/-/file-type-18.5.0.tgz",
"integrity": "sha512-yvpl5U868+V6PqXHMmsESpg6unQ5GfnPssl4dxdJudBrr9qy7Fddt7EVX1VLlddFfe8Gj9N7goCZH22FXuSQXQ==",
"dependencies": {
"readable-web-to-node-stream": "^3.0.2",
"strtok3": "^7.0.0",
"token-types": "^5.0.1"
},
"engines": {
"node": ">=14.16"
},
"funding": {
"url": "https://github.com/sindresorhus/file-type?sponsor=1"
}
},
"node_modules/formdata-polyfill": {
"version": "4.0.10",
"resolved": "https://registry.npmjs.org/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz",
"integrity": "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==",
"dependencies": {
"fetch-blob": "^3.1.2"
},
"engines": {
"node": ">=12.20.0"
}
},
"node_modules/gpt-tokenizer": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/gpt-tokenizer/-/gpt-tokenizer-2.1.1.tgz",
"integrity": "sha512-WlX+vj6aPaZ71U6Bf18fem+5k58zlgh2a4nbc7KHy6aGVIyq3nCh709b/8momu34sV/5t/SpzWi8LayWD9uyDw==",
"dependencies": {
"rfc4648": "^1.5.2"
}
},
"node_modules/htmlparser2": {
"version": "8.0.2",
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-8.0.2.tgz",
"integrity": "sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==",
"funding": [
"https://github.com/fb55/htmlparser2?sponsor=1",
{
"type": "github",
"url": "https://github.com/sponsors/fb55"
}
],
"dependencies": {
"domelementtype": "^2.3.0",
"domhandler": "^5.0.3",
"domutils": "^3.0.1",
"entities": "^4.4.0"
}
},
"node_modules/ieee754": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
"integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
]
},
"node_modules/inherits": {
"version": "2.0.4",
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
},
"node_modules/llama-tokenizer-js": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/llama-tokenizer-js/-/llama-tokenizer-js-1.0.0.tgz",
"integrity": "sha512-O2FsnoXwOsbrM91bd5iX6cPDJvKwvIRghwhhdgGJr4rxy3Ap9QznORqIJHjTqwy9JF1jiqP6sARo0pB6ojW/Cg=="
},
"node_modules/lodash": {
"version": "4.17.21",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
},
"node_modules/lodash.snakecase": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/lodash.snakecase/-/lodash.snakecase-4.1.1.tgz",
"integrity": "sha512-QZ1d4xoBHYUeuouhEq3lk3Uq7ldgyFXGBhg04+oRLnIz8o9T65Eh+8YdroUwn846zchkA9yDsDl5CVVaV2nqYw=="
},
"node_modules/node-domexception": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
"integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/jimmywarting"
},
{
"type": "github",
"url": "https://paypal.me/jimmywarting"
}
],
"engines": {
"node": ">=10.5.0"
}
},
"node_modules/node-fetch": {
"version": "3.3.1",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.3.1.tgz",
"integrity": "sha512-cRVc/kyto/7E5shrWca1Wsea4y6tL9iYJE5FBCius3JQfb/4P4I295PfhgbJQBLTx6lATE4z+wK0rPM4VS2uow==",
"dependencies": {
"data-uri-to-buffer": "^4.0.0",
"fetch-blob": "^3.1.4",
"formdata-polyfill": "^4.0.10"
},
"engines": {
"node": "^12.20.0 || ^14.13.1 || >=16.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/node-fetch"
}
},
"node_modules/node-nvidia-smi": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/node-nvidia-smi/-/node-nvidia-smi-1.0.0.tgz",
"integrity": "sha512-vjgWll2M/FWihNPKKbhWKSfbk57WrpVv1XOR5iuIxIHNieTUuJ0HrDIZj5gsJ63dx2eTbm/dWm+fWIPfYm+D3Q==",
"dependencies": {
"xml2js": "^0.4.17"
},
"bin": {
"node-nvidia-smi": "node-nvidia-smi.js"
}
},
"node_modules/nth-check": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz",
"integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==",
"dependencies": {
"boolbase": "^1.0.0"
},
"funding": {
"url": "https://github.com/fb55/nth-check?sponsor=1"
}
},
"node_modules/os": {
"version": "0.1.2",
"resolved": "https://registry.npmjs.org/os/-/os-0.1.2.tgz",
"integrity": "sha512-ZoXJkvAnljwvc56MbvhtKVWmSkzV712k42Is2mA0+0KTSRakq5XXuXpjZjgAt9ctzl51ojhQWakQQpmOvXWfjQ=="
},
"node_modules/parse5": {
"version": "7.1.2",
"resolved": "https://registry.npmjs.org/parse5/-/parse5-7.1.2.tgz",
"integrity": "sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==",
"dependencies": {
"entities": "^4.4.0"
},
"funding": {
"url": "https://github.com/inikulin/parse5?sponsor=1"
}
},
"node_modules/parse5-htmlparser2-tree-adapter": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.0.0.tgz",
"integrity": "sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g==",
"dependencies": {
"domhandler": "^5.0.2",
"parse5": "^7.0.0"
},
"funding": {
"url": "https://github.com/inikulin/parse5?sponsor=1"
}
},
"node_modules/peek-readable": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/peek-readable/-/peek-readable-5.0.0.tgz",
"integrity": "sha512-YtCKvLUOvwtMGmrniQPdO7MwPjgkFBtFIrmfSbYmYuq3tKDV/mcfAhBth1+C3ru7uXIZasc/pHnb+YDYNkkj4A==",
"engines": {
"node": ">=14.16"
},
"funding": {
"type": "github",
"url": "https://github.com/sponsors/Borewit"
}
},
"node_modules/readable-stream": {
"version": "3.6.2",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
"integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==",
"dependencies": {
"inherits": "^2.0.3",
"string_decoder": "^1.1.1",
"util-deprecate": "^1.0.1"
},
"engines": {
"node": ">= 6"
}
},
"node_modules/readable-web-to-node-stream": {
"version": "3.0.2",
"resolved": "https://registry.npmjs.org/readable-web-to-node-stream/-/readable-web-to-node-stream-3.0.2.tgz",
"integrity": "sha512-ePeK6cc1EcKLEhJFt/AebMCLL+GgSKhuygrZ/GLaKZYEecIgIECf4UaUuaByiGtzckwR4ain9VzUh95T1exYGw==",
"dependencies": {
"readable-stream": "^3.6.0"
},
"engines": {
"node": ">=8"
},
"funding": {
"type": "github",
"url": "https://github.com/sponsors/Borewit"
}
},
"node_modules/rfc4648": {
"version": "1.5.2",
"resolved": "https://registry.npmjs.org/rfc4648/-/rfc4648-1.5.2.tgz",
"integrity": "sha512-tLOizhR6YGovrEBLatX1sdcuhoSCXddw3mqNVAcKxGJ+J0hFeJ+SjeWCv5UPA/WU3YzWPPuCVYgXBKZUPGpKtg=="
},
"node_modules/safe-buffer": {
"version": "5.2.1",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
"integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
]
},
"node_modules/sax": {
"version": "1.2.4",
"resolved": "https://registry.npmjs.org/sax/-/sax-1.2.4.tgz",
"integrity": "sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw=="
},
"node_modules/streamsearch": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-1.1.0.tgz",
"integrity": "sha512-Mcc5wHehp9aXz1ax6bZUyY5afg9u2rv5cqQI3mRrYkGC8rW2hM02jWuwjtL++LS5qinSyhj2QfLyNsuc+VsExg==",
"engines": {
"node": ">=10.0.0"
}
},
"node_modules/string_decoder": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
"integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==",
"dependencies": {
"safe-buffer": "~5.2.0"
}
},
"node_modules/strtok3": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/strtok3/-/strtok3-7.0.0.tgz",
"integrity": "sha512-pQ+V+nYQdC5H3Q7qBZAz/MO6lwGhoC2gOAjuouGf/VO0m7vQRh8QNMl2Uf6SwAtzZ9bOw3UIeBukEGNJl5dtXQ==",
"dependencies": {
"@tokenizer/token": "^0.3.0",
"peek-readable": "^5.0.0"
},
"engines": {
"node": ">=14.16"
},
"funding": {
"type": "github",
"url": "https://github.com/sponsors/Borewit"
}
},
"node_modules/tiktoken": {
"version": "1.0.10",
"resolved": "https://registry.npmjs.org/tiktoken/-/tiktoken-1.0.10.tgz",
"integrity": "sha512-gF8ndTCNu7WcRFbl1UUWaFIB4CTXmHzS3tRYdyUYF7x3C6YR6Evoao4zhKDmWIwv2PzNbzoQMV8Pxt+17lEDbA=="
},
"node_modules/token-types": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/token-types/-/token-types-5.0.1.tgz",
"integrity": "sha512-Y2fmSnZjQdDb9W4w4r1tswlMHylzWIeOKpx0aZH9BgGtACHhrk3OkT52AzwcuqTRBZtvvnTjDBh8eynMulu8Vg==",
"dependencies": {
"@tokenizer/token": "^0.3.0",
"ieee754": "^1.2.1"
},
"engines": {
"node": ">=14.16"
},
"funding": {
"type": "github",
"url": "https://github.com/sponsors/Borewit"
}
},
"node_modules/ts-mixer": {
"version": "6.0.3",
"resolved": "https://registry.npmjs.org/ts-mixer/-/ts-mixer-6.0.3.tgz",
"integrity": "sha512-k43M7uCG1AkTyxgnmI5MPwKoUvS/bRvLvUb7+Pgpdlmok8AoqmUaZxUUw8zKM5B1lqZrt41GjYgnvAi0fppqgQ=="
},
"node_modules/tslib": {
"version": "2.5.3",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz",
"integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w=="
},
"node_modules/undici": {
"version": "5.22.1",
"resolved": "https://registry.npmjs.org/undici/-/undici-5.22.1.tgz",
"integrity": "sha512-Ji2IJhFXZY0x/0tVBXeQwgPlLWw13GVzpsWPQ3rV50IFMMof2I55PZZxtm4P6iNq+L5znYN9nSTAq0ZyE6lSJw==",
"dependencies": {
"busboy": "^1.6.0"
},
"engines": {
"node": ">=14.0"
}
},
"node_modules/util-deprecate": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
"integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw=="
},
"node_modules/web-streams-polyfill": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.2.1.tgz",
"integrity": "sha512-e0MO3wdXWKrLbL0DgGnUV7WHVuw9OUvL4hjgnPkIeEvESk74gAITi5G606JtZPp39cd8HA9VQzCIvA49LpPN5Q==",
"engines": {
"node": ">= 8"
}
},
"node_modules/ws": {
"version": "8.13.0",
"resolved": "https://registry.npmjs.org/ws/-/ws-8.13.0.tgz",
"integrity": "sha512-x9vcZYTrFPC7aSIbj7sRCYo7L/Xb8Iy+pW0ng0wt2vCJv7M9HOMy0UoN3rr+IFC7hb7vXoqS+P9ktyLLLhO+LA==",
"engines": {
"node": ">=10.0.0"
},
"peerDependencies": {
"bufferutil": "^4.0.1",
"utf-8-validate": ">=5.0.2"
},
"peerDependenciesMeta": {
"bufferutil": {
"optional": true
},
"utf-8-validate": {
"optional": true
}
}
},
"node_modules/xml2js": {
"version": "0.4.23",
"resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.4.23.tgz",
"integrity": "sha512-ySPiMjM0+pLDftHgXY4By0uswI3SPKLDw/i3UXbnO8M/p28zqexCUoPmQFrYD+/1BzhGJSs2i1ERWKJAtiLrug==",
"dependencies": {
"sax": ">=0.6.0",
"xmlbuilder": "~11.0.0"
},
"engines": {
"node": ">=4.0.0"
}
},
"node_modules/xmlbuilder": {
"version": "11.0.1",
"resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-11.0.1.tgz",
"integrity": "sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==",
"engines": {
"node": ">=4.0"
}
}
}
}

View File

@ -10,10 +10,15 @@
"author": "",
"license": "ISC",
"dependencies": {
"cheerio": "^1.0.0-rc.12",
"cpu-stat": "^2.0.1",
"discord.js": "^14.9.0",
"dotenv": "^16.0.3",
"gpt-tokenizer": "^2.1.1",
"llama-tokenizer-js": "^1.0.0",
"node-fetch": "^3.3.1",
"node-nvidia-smi": "^1.0.0",
"os": "^0.1.2",
"cpu-stat": "^2.0.1"
"tiktoken": "^1.0.10"
}
}

View File

@ -1,11 +1,11 @@
FROM python:bullseye
FROM ubuntu:latest
RUN apt-get update; \
apt-get install -y --no-install-recommends \
build-essential
build-essential cmake python3 python3-pip
WORKDIR /usr/src/app
RUN pip install --no-cache-dir llama-cpp-python[server]
RUN CMAKE_ARGS="-DLLAMA_METAL=on" FORCE_CMAKE=1 pip install llama-cpp-python[server]
CMD python3 -m llama_cpp.server