forked from snxraven/llama-cpp-python-djs-bot
Compare commits
62 Commits
e12f518bd7
...
main
Author | SHA1 | Date | |
---|---|---|---|
e138c67fe2 | |||
50d947b9c2 | |||
b009ace3e7 | |||
385234e05a | |||
63a4486745 | |||
f4e97d9710 | |||
4a05334b15 | |||
cae0112077 | |||
72059a430a | |||
45ef05ac45 | |||
ca758c5898 | |||
335f06ff69 | |||
6b739d2b80 | |||
9b4981c539 | |||
9660806f94 | |||
c28bf9c022 | |||
6531b633f7 | |||
99ea986bbd | |||
a34a910200 | |||
637c5182a3 | |||
5516e4c20b | |||
9489068b8e | |||
f211fe2d67 | |||
932474ec63 | |||
fb08f0b592 | |||
9197996ac7 | |||
016f553192 | |||
691b3137c3 | |||
f272839ab1 | |||
b1c1f6995a | |||
9b00f2fcd8 | |||
38f6c38f6d | |||
2771052f23 | |||
f87b61fb2b | |||
c20ba21180 | |||
38fba90d30 | |||
0caf82d7f6 | |||
d2aae48e33 | |||
3da598c218 | |||
735b94360a | |||
da5e2e6e7d | |||
3e220b6bbb | |||
30e07afa85 | |||
1d0b225438 | |||
c97e525d45 | |||
3806892349 | |||
b72875e744 | |||
fffef84119 | |||
4e69329501 | |||
4ff67ff28b | |||
c2396f7e5d | |||
7b3e0c1db2 | |||
5f8e57d121 | |||
cc770e617d | |||
5a56251e20 | |||
6bb74c8020 | |||
56c7bfd26d | |||
5793b7b4ad | |||
cf6e47eebc | |||
f98caa23cc | |||
d3162bce32 | |||
ec7dbde761 |
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,2 +1,2 @@
|
||||
.env
|
||||
|
||||
node_modules/
|
||||
|
10
Dockerfile
Normal file
10
Dockerfile
Normal file
@ -0,0 +1,10 @@
|
||||
FROM node:slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY package*.json ./
|
||||
RUN npm install --omit=dev
|
||||
|
||||
COPY . .
|
||||
|
||||
CMD node llamabot.js
|
19
README.md
19
README.md
@ -32,6 +32,9 @@ Define a generateResponse function that sends a request to the GPT-3 API to gene
|
||||
|
||||
Call the generateResponse function within the messageCreate event listener function.
|
||||
|
||||

|
||||
|
||||
|
||||
# Backend REQUIIRED
|
||||
|
||||
The HTTP Server from https://abetlen.github.io/llama-cpp-python/ is required to use this bot.
|
||||
@ -48,7 +51,7 @@ python3 -m llama_cpp.server
|
||||
|
||||
Navigate to http://localhost:8000/docs to see the OpenAPI documentation.
|
||||
|
||||
# Usage
|
||||
# Static Usage
|
||||
|
||||
1) Use ```npm i ```
|
||||
|
||||
@ -60,5 +63,19 @@ Navigate to http://localhost:8000/docs to see the OpenAPI documentation.
|
||||
|
||||
6) Run the bot ```node llamabot.js ```
|
||||
|
||||
# Docker Compose
|
||||
This will automatically configure the API for you as well as the bot in two seperate containers within a stack.
|
||||
|
||||
1. `git clone https://git.ssh.surf/snxraven/llama-cpp-python-djs-bot.git`
|
||||
|
||||
|
||||
2. `cp default.env .env`
|
||||
|
||||
3. Set DATA_DIR in .env to the exact location of your model files.
|
||||
|
||||
4. Edit docker-compose.yaml MODEL to ensure the correct model bin is set
|
||||
|
||||
5. `docker compose up -d`
|
||||
|
||||
|
||||
Want to make this better? Issue a pull request!
|
||||
|
@ -1,4 +1,4 @@
|
||||
const emptyResponses = [
|
||||
export const emptyResponses = [
|
||||
"Oh boy, this is a tough one! Unfortunately, I don't have much insight to share on this topic.",
|
||||
"Hmm, I'm scratching my head on this one. Sorry to say that I don't have much to offer.",
|
||||
"Wish I had a witty remark, but alas, I don't have much to contribute to this discussion.",
|
||||
@ -28,5 +28,3 @@ const emptyResponses = [
|
||||
"I'm afraid I don't have much experience with this, so I can't be of much help.",
|
||||
"I wish I had some grand insight to share, but alas, I don't have much to offer in that regard."
|
||||
];
|
||||
|
||||
module.exports = emptyResponses;
|
@ -1,4 +1,4 @@
|
||||
const errorMessages = [
|
||||
export const errorMessages = [
|
||||
"Uh oh, looks like something went awry! Try !reset to start fresh.",
|
||||
"Oops, we hit a bump in the road! Give !reset a try to start anew.",
|
||||
"We've encountered an error, but !reset can help us out! Give it a go.",
|
||||
@ -21,7 +21,7 @@ const errorMessages = [
|
||||
"Oopsie daisy! Give !reset a try and we'll start over."
|
||||
];
|
||||
|
||||
const busyResponses = [
|
||||
export const busyResponses = [
|
||||
"Sorry about that! Looks like I'm tied up at the moment. Please try again later.",
|
||||
"Oops, I'm currently busy with something else. Please try again later.",
|
||||
"Looks like I'm already working on something. Can you try again later?",
|
||||
@ -43,5 +43,3 @@ const errorMessages = [
|
||||
"Looks like I'm currently engaged with something else. Please try again later.",
|
||||
"I'm currently unavailable. Can you try again later?"
|
||||
];
|
||||
|
||||
module.exports = { errorMessages, busyResponses};
|
@ -1,4 +1,4 @@
|
||||
const resetResponses = [
|
||||
export const resetResponses = [
|
||||
"Whoops, let's start fresh! What can I assist you with now?",
|
||||
"Looks like we need a fresh start! What do you need help with?",
|
||||
"To avoid any gremlins in the system, let's reset! How can I assist you now?",
|
||||
@ -21,7 +21,7 @@ const resetResponses = [
|
||||
"Let's hit the restart button to make sure we're on the right track. What can I help you with now?"
|
||||
];
|
||||
|
||||
const userResetMessages = [
|
||||
export const userResetMessages = [
|
||||
"All good, we're starting fresh! How can I assist you?",
|
||||
"Got it, let's start over! How can I help you today?",
|
||||
"Alright, starting anew! What can I help you with?",
|
||||
@ -43,5 +43,3 @@ const resetResponses = [
|
||||
"Sure thing, we'll start over! What can I help you with today?",
|
||||
"Conversation reset, confirmed! What do you need help with?"
|
||||
];
|
||||
|
||||
module.exports = {resetResponses, userResetMessages};
|
44
default.env
44
default.env
@ -1,4 +1,48 @@
|
||||
# Discord Token
|
||||
THE_TOKEN = "DISCORD_TOKEN_HERE"
|
||||
|
||||
# The Channel IDs the bot will operate in seperated by commas
|
||||
CHANNEL_IDS = 1094494101631680653,1094628334727614605
|
||||
|
||||
# The INIT prompt for all conversations.
|
||||
INIT_PROMPT = "Assistant name: ChatAI. You code, write and provide any information without any mistakes."
|
||||
|
||||
# Loading Emebed Refresh Timing
|
||||
REFRESH_INTERVAL=10
|
||||
|
||||
# When a message is too large for discord we chunk the response into seperate messages.
|
||||
# To ensure we do not rate limit the bot we send these at a delay interval.
|
||||
# DEFAULT: 3 a good setting is between 3 and 7 seconds.
|
||||
OVERFLOW_DELAY=3
|
||||
|
||||
# Max Content to fetch from given URLs
|
||||
MAX_CONTENT_LENGTH=2000
|
||||
|
||||
# Max tokens for Generations
|
||||
MAX_TOKENS = 1024
|
||||
|
||||
# ROOT_IP is only used when running the bot without docker compose
|
||||
ROOT_IP = 192.168.0.15
|
||||
|
||||
# PORT is only used when running the bot without docker compose
|
||||
ROOT_PORT = 8000
|
||||
|
||||
# Directory to your models (llama.cpp specfic settings)
|
||||
DATA_DIR = /home/USERNAME/weights
|
||||
|
||||
# Enable Expirmental Message Caches (Limited to single session)
|
||||
# Cache will use ~1.4 GB or MORE of RAM. ONLY ENABLE IF YOUR SYSTEM CAN HANDLE THIS.
|
||||
CACHE = 1
|
||||
|
||||
# Set number of threads to use, currently, a standard thread will utilize 1 whole core
|
||||
# I usually will set this between all cores I physcally have OR 2 cores less to allow for other processes.
|
||||
N_THREADS = 4
|
||||
|
||||
# Always use MMAP unless you know what you are doing
|
||||
USE_MMAP=1
|
||||
|
||||
# Only use MLOCK if you know what it does!
|
||||
USE_MLOCK=0
|
||||
|
||||
# The higher the number the more hard core.
|
||||
REPEAT_PENALTY=1
|
28
docker-compose.yml
Normal file
28
docker-compose.yml
Normal file
@ -0,0 +1,28 @@
|
||||
version: '3.9'
|
||||
|
||||
services:
|
||||
llama-python-server:
|
||||
container_name: llama-python-server
|
||||
restart: unless-stopped
|
||||
build:
|
||||
context: ./server
|
||||
env_file: .env
|
||||
volumes:
|
||||
- ${DATA_DIR}:/usr/src/app/models
|
||||
environment:
|
||||
- HOST=llama-python-server
|
||||
- MODEL=./models/ggml-vic7b-q4_0.bin
|
||||
llama-python-djs-bot:
|
||||
container_name: llama-python-djs-bot
|
||||
restart: unless-stopped
|
||||
build:
|
||||
context: .
|
||||
depends_on:
|
||||
- llama-python-server
|
||||
environment:
|
||||
- THE_TOKEN
|
||||
- REFRESH_INTERVAL
|
||||
- CHANNEL_IDS
|
||||
- ROOT_IP=llama-python-server
|
||||
- ROOT_PORT=8000
|
||||
- INIT_PROMPT='Assistant Name':' ChatAI. You code, write and provide any information without any mistakes.'
|
42
huggingface-config/Dockerfile
Normal file
42
huggingface-config/Dockerfile
Normal file
@ -0,0 +1,42 @@
|
||||
FROM ubuntu:latest
|
||||
|
||||
RUN apt update
|
||||
|
||||
RUN DEBIAN_FRONTEND=noninteractive apt install curl sudo -y
|
||||
|
||||
RUN curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash -
|
||||
|
||||
RUN DEBIAN_FRONTEND=noninteractive apt install nodejs -y
|
||||
|
||||
RUN npm i pm2 -g
|
||||
|
||||
RUN mkdir -p /code/.pm2
|
||||
|
||||
RUN mkdir -p /.pm2
|
||||
|
||||
RUN chmod 0777 /code/.pm2
|
||||
|
||||
RUN chmod 0777 /.pm2
|
||||
|
||||
RUN DEBIAN_FRONTEND=noninteractive apt install wget python3 python3-pip -y
|
||||
|
||||
WORKDIR /code
|
||||
|
||||
RUN pip install --no-cache-dir llama-cpp-python[server]
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN npm i
|
||||
|
||||
ENV HOST localhost
|
||||
ENV PORT 7860
|
||||
ENV MODEL=/code/ggml-vic7b-q4_0.bin
|
||||
ENV CACHE=1
|
||||
ENV USE_MLOCK=0
|
||||
ENV REPEAT_PENALTY=1
|
||||
ENV MODEL=/code/ggml-vic7b-q4_0.bin
|
||||
ENV PM2_HOME=/code/.pm2
|
||||
|
||||
RUN wget -q https://huggingface.co/eachadea/ggml-vicuna-7b-1.1/resolve/main/ggml-vic7b-q4_0.bin
|
||||
|
||||
CMD /bin/bash /code/start.sh
|
17
huggingface-config/README.md
Normal file
17
huggingface-config/README.md
Normal file
@ -0,0 +1,17 @@
|
||||
# How to Deploy on Hugging Face
|
||||
|
||||
1) Move the "assets" directory and llamabot.js, package.json files into your HuggingFace repo directory.
|
||||
|
||||
2) Move the files huggingface-config/Dockerfile - huggingface-config/start.sh - huggingface-config/startServices.json into the root of the hugging face repo.
|
||||
|
||||
3) Edit Docker file as you need, the dockerfile is set to automatically download Vicuna 1.1 7B
|
||||
|
||||
3) Move default.env into your repo as .env and edit for your needs
|
||||
|
||||
4) Push the changes
|
||||
|
||||
You should then see the bot being built and deployed on HuggingFace
|
||||
|
||||
pm2 log will run automatically so you can see frontend and backend logs.
|
||||
|
||||
PLEASE NOTE: Your hugging face repo should remain private!
|
6
huggingface-config/start.sh
Normal file
6
huggingface-config/start.sh
Normal file
@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
# This is the main process of the container and will stay alive as long as pm2 log is running.
|
||||
|
||||
pm2 start startServices.json
|
||||
|
||||
pm2 log
|
14
huggingface-config/startServices.json
Normal file
14
huggingface-config/startServices.json
Normal file
@ -0,0 +1,14 @@
|
||||
{
|
||||
"apps": [
|
||||
{
|
||||
"name": "Discord-Bot",
|
||||
"script": "node /code/llamabot.js",
|
||||
"args" : ""
|
||||
},
|
||||
{
|
||||
"name": "AI-API",
|
||||
"script": "python3 -m llama_cpp.server",
|
||||
"args" : ""
|
||||
}
|
||||
]
|
||||
}
|
186
llamabot.js
186
llamabot.js
@ -1,23 +1,23 @@
|
||||
const Discord = require('discord.js');
|
||||
const fetch = require('node-fetch');
|
||||
const emptyResponses = require('./assets/emptyMessages.js');
|
||||
const { resetResponses, userResetMessages } = require('./assets/resetMessages.js');
|
||||
const { errorMessages, busyResponses } = require('./assets/errorMessages.js');
|
||||
import "dotenv/config.js";
|
||||
import fetch from 'node-fetch';
|
||||
import { emptyResponses } from './assets/emptyMessages.js';
|
||||
import { resetResponses, userResetMessages } from './assets/resetMessages.js';
|
||||
import { errorMessages, busyResponses } from './assets/errorMessages.js';
|
||||
import cpuStat from 'cpu-stat';
|
||||
import os from 'os';
|
||||
|
||||
require('dotenv').config()
|
||||
|
||||
const {
|
||||
import {
|
||||
Client,
|
||||
GatewayIntentBits,
|
||||
ActivityType,
|
||||
Partials
|
||||
} = require('discord.js');
|
||||
} from 'discord.js';
|
||||
|
||||
const client = new Client({
|
||||
intents: [
|
||||
GatewayIntentBits.DirectMessages,
|
||||
GatewayIntentBits.Guilds,
|
||||
GatewayIntentBits.GuildBans,
|
||||
GatewayIntentBits.GuildModeration,
|
||||
GatewayIntentBits.GuildMessages,
|
||||
GatewayIntentBits.MessageContent,
|
||||
],
|
||||
@ -111,15 +111,15 @@ client.on('messageCreate', async (message) => {
|
||||
if (conversation.messages.length === 0) {
|
||||
conversation.messages.push({
|
||||
role: 'user',
|
||||
content: `Your name is rAi, you code, write and provide any information without any mistakes.`
|
||||
content: ` ${process.env.INIT_PROMPT}`
|
||||
});
|
||||
conversation.messages.push({
|
||||
role: 'user',
|
||||
content: `My name is ${message.author.username}.`
|
||||
content: ` User name: ${message.author.username}.`
|
||||
});
|
||||
conversation.messages.push({
|
||||
role: 'assistant',
|
||||
content: `Hello, ${message.author.username}, how may I help you?`
|
||||
content: ` Hello, ${message.author.username}, how may I help you?`
|
||||
});
|
||||
}
|
||||
|
||||
@ -132,14 +132,14 @@ client.on('messageCreate', async (message) => {
|
||||
// Append user message to conversation history
|
||||
conversation.messages.push({
|
||||
role: 'user',
|
||||
content: message.cleanContent
|
||||
content: ` ${message.cleanContent}`
|
||||
});
|
||||
|
||||
try {
|
||||
setPresenceBusy()
|
||||
setBusy(message.author.id, true);
|
||||
|
||||
const response = await generateResponse(conversation);
|
||||
const response = await generateResponse(conversation, message);
|
||||
|
||||
// Append bot message to conversation history
|
||||
conversation.messages.push({
|
||||
@ -149,7 +149,26 @@ client.on('messageCreate', async (message) => {
|
||||
|
||||
if (response && response.trim()) {
|
||||
// Send response to user if it's not empty
|
||||
await message.channel.send(response);
|
||||
const limit = 1980;
|
||||
|
||||
// if we are over the discord char limit we need chunks...
|
||||
if (response.length > limit) {
|
||||
|
||||
|
||||
const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
|
||||
if (chunks.length >= 15) return await message.channel.send("Response chunks too large. Try again");
|
||||
|
||||
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
setTimeout(() => {
|
||||
message.channel.send(chunks[i]);
|
||||
}, i * (process.env.OVERFLOW_DELAY || 3) * 1000); // delay of 3 seconds between each chunk to save on API requests
|
||||
}
|
||||
} else {
|
||||
// We are good to go, send the response
|
||||
await message.channel.send(response.replace("@", ""));
|
||||
}
|
||||
|
||||
setPresenceOnline()
|
||||
setBusy(message.author.id, false);
|
||||
} else {
|
||||
@ -161,16 +180,75 @@ client.on('messageCreate', async (message) => {
|
||||
conversation.busy = false;
|
||||
}
|
||||
conversations.set(userID, conversation); // Update user's conversation map in memory
|
||||
console.log(conversation)
|
||||
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
sendRand(errorMessages)
|
||||
return sendRand(errorMessages)
|
||||
} finally {
|
||||
setPresenceOnline()
|
||||
setBusy(message.author.id, false);
|
||||
}
|
||||
});
|
||||
|
||||
async function generateResponse(conversation) {
|
||||
import cheerio from 'cheerio';
|
||||
|
||||
async function generateResponse(conversation, message) {
|
||||
|
||||
// Check if message contains a URL
|
||||
const urlRegex = /(https?:\/\/[^\s]+)/g;
|
||||
const urls = message.content.match(urlRegex);
|
||||
|
||||
if (urls) {
|
||||
// If there are multiple URLs, process them one by one
|
||||
for (const url of urls) {
|
||||
try {
|
||||
const res = await fetch(url);
|
||||
const html = await res.text();
|
||||
const $ = cheerio.load(html);
|
||||
|
||||
// Extract page title, meta description and content
|
||||
const pageTitle = $('head title').text().trim();
|
||||
const pageDescription = $('head meta[name="description"]').attr('content');
|
||||
const pageContent = $('body').text().trim();
|
||||
|
||||
// Construct response message with page details
|
||||
let response = `Title: ${pageTitle}\n`;
|
||||
if (pageDescription) {
|
||||
response += `Description: ${pageDescription}\n`;
|
||||
}
|
||||
if (pageContent) {
|
||||
const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH;
|
||||
let plainTextContent = $('<div>').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' ');
|
||||
const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/;
|
||||
const isCode = codePattern.test(plainTextContent);
|
||||
|
||||
if (isCode) {
|
||||
plainTextContent = plainTextContent.replace(codePattern, '');
|
||||
}
|
||||
// Remove anything enclosed in brackets
|
||||
plainTextContent = plainTextContent.replace(/ *\([^)]*\) */g, '');
|
||||
if (plainTextContent.length > MAX_CONTENT_LENGTH) {
|
||||
plainTextContent = plainTextContent.substring(0, MAX_CONTENT_LENGTH) + '...';
|
||||
}
|
||||
response += `Content: ${plainTextContent.trim()}`;
|
||||
}
|
||||
response += `URL: ${url}`;
|
||||
|
||||
// Get the index of the last message in the array
|
||||
const lastMessageIndex = conversation.messages.length - 1;
|
||||
|
||||
// Append a new line and the new content to the existing content of the last message
|
||||
conversation.messages[lastMessageIndex].content += "\n" + response;
|
||||
|
||||
console.log("A URL was provided, response: " + response)
|
||||
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
return sendRand(errorMessages);
|
||||
}
|
||||
}
|
||||
}
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => {
|
||||
controller.abort();
|
||||
@ -178,7 +256,56 @@ async function generateResponse(conversation) {
|
||||
|
||||
const messagesCopy = [...conversation.messages]; // create a copy of the messages array
|
||||
|
||||
console.log(conversation)
|
||||
let botMessage; // define a variable to hold the message object
|
||||
let time = 0
|
||||
// define a function that shows the system load percentage and updates the message
|
||||
const showSystemLoad = async () => {
|
||||
time = Number(time) + Number(process.env.REFRESH_INTERVAL);
|
||||
cpuStat.usagePercent(function (err, percent, seconds) {
|
||||
if (err) {
|
||||
return console.log(err);
|
||||
}
|
||||
|
||||
const systemLoad = percent;
|
||||
const freeMemory = os.freemem() / 1024 / 1024 / 1024;
|
||||
const totalMemory = os.totalmem() / 1024 / 1024 / 1024;
|
||||
const usedMemory = totalMemory - freeMemory;
|
||||
|
||||
const embedData = {
|
||||
color: 0x0099ff,
|
||||
title: 'Please wait.. I am thinking...',
|
||||
fields: [
|
||||
{
|
||||
name: 'System Load',
|
||||
value: `${systemLoad.toFixed(2)}%`,
|
||||
},
|
||||
{
|
||||
name: 'Memory Usage',
|
||||
value: `${usedMemory.toFixed(2)} GB / ${totalMemory.toFixed(2)} GB`,
|
||||
},
|
||||
{
|
||||
name: 'Time',
|
||||
value: `~${time} seconds.`,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
// if the message object doesn't exist, create it
|
||||
if (!botMessage) {
|
||||
(async () => {
|
||||
botMessage = await message.channel.send({ embeds: [embedData] });
|
||||
})();
|
||||
} else {
|
||||
botMessage.edit({ embeds: [embedData] }); // otherwise, update the message
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
// call the function initially
|
||||
await showSystemLoad();
|
||||
|
||||
// Grab the REFRESH_INTERVAL from ENV if not exist, lets use 7 (seconds)
|
||||
const refreshInterval = setInterval(showSystemLoad, (process.env.REFRESH_INTERVAL || 7) * 1000);
|
||||
|
||||
try {
|
||||
const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
|
||||
@ -188,25 +315,32 @@ async function generateResponse(conversation) {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
messages: messagesCopy // use the copy of the messages array
|
||||
messages: messagesCopy,
|
||||
max_tokens: Number(process.env.MAX_TOKENS), // add the max_tokens parameter here
|
||||
repeat_penalty: Number(process.env.REPEAT_PENALTY)
|
||||
|
||||
}),
|
||||
signal: controller.signal
|
||||
});
|
||||
|
||||
const responseData = await response.json();
|
||||
console.log(JSON.stringify(responseData))
|
||||
console.log(JSON.stringify(responseData));
|
||||
const choice = responseData.choices[0];
|
||||
|
||||
// Remove "user None:" and any text after it from the response
|
||||
const responseText = choice.message.content.trim();
|
||||
const startIndex = responseText.indexOf('user None:');
|
||||
const sanitizedResponse = startIndex === -1 ? responseText : responseText.substring(0, startIndex);
|
||||
const responseText = choice.message.content;
|
||||
|
||||
// clear the interval, replace the "please wait" message with the response, and update the message
|
||||
clearInterval(refreshInterval);
|
||||
console.log(responseText);
|
||||
botMessage.delete()
|
||||
|
||||
return responseText;
|
||||
|
||||
return sanitizedResponse;
|
||||
} catch (err) {
|
||||
throw err;
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
time = 0
|
||||
}
|
||||
}
|
||||
|
||||
|
20
package.json
Normal file
20
package.json
Normal file
@ -0,0 +1,20 @@
|
||||
{
|
||||
"name": "llama-cpp-python-djs-bot",
|
||||
"version": "1.0.0",
|
||||
"description": "",
|
||||
"main": "llamabot.js",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"discord.js": "^14.9.0",
|
||||
"cheerio": "^1.0.0-rc.12",
|
||||
"dotenv": "^16.0.3",
|
||||
"node-fetch": "^3.3.1",
|
||||
"os": "^0.1.2",
|
||||
"cpu-stat": "^2.0.1"
|
||||
}
|
||||
}
|
11
server/Dockerfile
Normal file
11
server/Dockerfile
Normal file
@ -0,0 +1,11 @@
|
||||
FROM python:bullseye
|
||||
|
||||
RUN apt-get update; \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential
|
||||
|
||||
WORKDIR /usr/src/app
|
||||
|
||||
RUN pip install --no-cache-dir llama-cpp-python[server]
|
||||
|
||||
CMD python3 -m llama_cpp.server
|
Reference in New Issue
Block a user