llama-cpp-python-djs-bot/llamabot.js
2023-06-12 18:16:16 +02:00

513 lines
19 KiB
JavaScript

import "dotenv/config.js";
import fetch from 'node-fetch';
import { emptyResponses } from './assets/emptyMessages.js';
import { resetResponses, userResetMessages } from './assets/resetMessages.js';
import { errorMessages, busyResponses } from './assets/errorMessages.js';
import cpuStat from 'cpu-stat';
import os from 'os';
import smi from 'node-nvidia-smi';
import llamaTokenizer from 'llama-tokenizer-js'
import {
Client,
GatewayIntentBits,
ActivityType,
Partials
} from 'discord.js';
const client = new Client({
intents: [
GatewayIntentBits.DirectMessages,
GatewayIntentBits.Guilds,
GatewayIntentBits.GuildModeration,
GatewayIntentBits.GuildMessages,
GatewayIntentBits.MessageContent,
],
partials: [Partials.Channel],
});
// Grab ChannelIDs from the .env file
const channelIDs = process.env.CHANNEL_IDS.split(',');
// Store Conversations in a MAP
const conversations = new Map();
let botMessage; // define a variable to hold the message object
// Set busy function this allows us to set our bot into busy mode
// locking out all other tasks until the current one is complete
function setBusy(userId, isBusy) {
if (conversations.has(userId)) {
conversations.get(userId).busy = isBusy;
} else {
conversations.set(userId, {
busy: isBusy
});
}
}
// General check, if any conversation is busy
// If yes, flag it and let us know
function isAnyConversationBusy() {
for (const conversation of conversations.values()) {
if (conversation.busy) {
setPresenceBusy()
return true;
}
}
return false;
}
// Setting our precence to busy within the bots status
function setPresenceBusy() {
client.user.setPresence({
activities: [{
name: `Processing a Request`,
type: ActivityType.Playing
}],
status: 'dnd',
});
}
// Setting our precence to ready within the bots status
function setPresenceOnline() {
client.user.setPresence({
activities: [{
name: `Ready for Request`,
type: ActivityType.Playing
}],
status: 'online',
});
}
// When we have logged in to discord api
// Set precence to online.
client.once('ready', () => {
console.log('Bot is ready.');
setPresenceOnline()
});
// When a message is sent within discord, lets handle it.
client.on('messageCreate', async (message) => {
// Function to send a random message from any array
async function sendRand(array) {
const arrayChoice = array[Math.floor(Math.random() * array.length)];
await message.channel.send(arrayChoice); // give a notification of reset using a human like response.
}
// Function to send a random Direct Message from any array
async function sendRandDM(array) {
const arrayChoice = array[Math.floor(Math.random() * array.length)];
await message.author.send(arrayChoice); // give a notification of reset using a human like response.
}
// Only respond in the specified channels
if (!channelIDs.includes(message.channel.id)) {
return;
}
// Always ignore bots!
if (message.author.bot) return;
// Check if any conversation is busy
if (isAnyConversationBusy()) {
// Update bot presence to "Busy"
setPresenceBusy()
message.delete();
sendRandDM(busyResponses);
return;
}
// Set user ID and get our conversation.
const userID = message.author.id;
let conversation = conversations.get(userID) || {
messages: [],
busy: false
};
// If we do not have a conversation, lets generate one.
// This requires a chatflow for the API.
// Its better to have a default beginning conversation
// Providing context for the AI Model.
if (conversation.messages.length === 0) {
conversation.messages.push({
role: 'user',
content: ` ${process.env.INIT_PROMPT}`
});
conversation.messages.push({
role: 'user',
content: ` User name: ${message.author.username}.`
});
conversation.messages.push({
role: 'assistant',
content: ` Hello, ${message.author.username}, how may I help you?`
});
}
// If a user needs a reset, we delete their MAP
if (message.content === '!reset' || message.content === '!r') {
conversations.delete(userID); // Delete user's conversation map if they request reset
sendRand(userResetMessages)
return;
}
// Begin processing our conversation, this is our main work flow.
// Append user message to conversation history
conversation.messages.push({
role: 'user',
content: ` ${message.cleanContent}`
});
try {
// Now we have our conversation set up
// Lets set precence to busy
// We also will set our conversations MAP to busy
// Locking out all other tasks
setPresenceBusy()
setBusy(message.author.id, true);
// Lets start generating the response
const response = await generateResponse(conversation, message);
// Append bot message to conversation history when it is ready
conversation.messages.push({
role: 'assistant',
content: response
});
if (response && response.trim()) {
// Send response to user if it's not empty
const limit = 1980;
// if we are over the discord char limit we need chunks...
if (response.length > limit) {
// We are going to check all of the message chunks if our response is too large for discord.
// We can extend our message size using chunks, the issue?
// Users can abuse this feature, we lock this to 15 to avoid API Abuse.
const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
if (chunks.length >= 15) return await message.channel.send("Response chunks too large. Try again");
// If we do now have too many chunks, lets send each one using our overflow delay
for (let i = 0; i < chunks.length; i++) {
setTimeout(() => {
message.channel.send(chunks[i]);
}, i * (process.env.OVERFLOW_DELAY || 3) * 1000); // delay of 3 seconds between each chunk to save on API requests
}
} else {
// We are good to go message is not too large for discord, send the response
await message.channel.send(response.replace("@", ""));
}
// We have completed our task, lets go online
setPresenceOnline()
// set our conversation MAP to not busy
setBusy(message.author.id, false);
} else {
// Handle empty response here
sendRand(emptyResponses)
conversations.delete(userID); // Delete user's conversation map if they request reset
sendRand(resetResponses)
setPresenceOnline()
conversation.busy = false;
}
conversations.set(userID, conversation); // Update user's conversation map in memory
// Print the current conversation as it stands
console.log(conversation)
} catch (err) {
// If we have any errors lets send a response
console.error(err);
return sendRand(errorMessages)
} finally {
// We are done! Lets finish up going online
setPresenceOnline()
setBusy(message.author.id, false);
}
});
// Import cheerio for scraping
import cheerio from 'cheerio';
async function generateResponse(conversation, message) {
// Begin web scraper if a https:// OR http:// URL is detected
// Check if message contains a URL
const urlRegex = /(https?:\/\/[^\s]+)/g;
// Match our REGEX
const urls = message.content.match(urlRegex);
if (urls) {
// If there are multiple URLs, process them one by one
for (const url of urls) {
try {
const res = await fetch(url);
const html = await res.text();
const $ = cheerio.load(html);
// Extract page title, meta description and content
const pageTitle = $('head title').text().trim();
const pageDescription = $('head meta[name="description"]').attr('content');
const pageContent = $('body').text().trim();
// Construct response message with page details
let response = `Title: ${pageTitle}\n`;
if (pageDescription) {
response += `Description: ${pageDescription}\n`;
}
if (pageContent) {
// Lets check for content and grab only the amount as configured.
const MAX_CONTENT_LENGTH = process.env.MAX_CONTENT_LENGTH;
let plainTextContent = $('<div>').html(pageContent).text().trim().replace(/[\r\n\t]+/g, ' ');
// Clean up code remove it from processing
const codePattern = /\/\/|\/\*|\*\/|\{|\}|\[|\]|\bfunction\b|\bclass\b|\b0x[0-9A-Fa-f]+\b|\b0b[01]+\b/;
const isCode = codePattern.test(plainTextContent);
if (isCode) {
plainTextContent = plainTextContent.replace(codePattern, '');
}
// Remove anything enclosed in brackets JUNK DATA
plainTextContent = plainTextContent.replace(/ *\([^)]*\) */g, '');
if (plainTextContent.length > MAX_CONTENT_LENGTH) {
plainTextContent = plainTextContent.substring(0, MAX_CONTENT_LENGTH) + '...';
}
response += `Content: ${plainTextContent.trim()}`;
}
response += `URL: ${url}`;
// Get the index of the last message in the array
const lastMessageIndex = conversation.messages.length - 1;
// Append a new line and the new content to the existing content of the last message
conversation.messages[lastMessageIndex].content += "\n" + response;
console.log("A URL was provided, response: " + response)
} catch (err) {
console.error(err);
return sendRand(errorMessages);
}
}
}
// We need an abort controller to stop our progress message editor
const controller = new AbortController();
// Set our timeout for the controller
const timeout = setTimeout(() => {
controller.abort();
}, 900000);
// Copy our messages from MAP
const messagesCopy = [...conversation.messages]; // create a copy of the messages array
let time = 0
// define a function that shows the system load percentage and updates the message
const showSystemLoad = async () => {
// Configure our inital time
time = Number(time) + Number(process.env.REFRESH_INTERVAL);
// Get system stats
cpuStat.usagePercent(function (err, percent, seconds) {
if (err) {
return console.log(err);
}
// Setting out system stat vars
const systemLoad = percent;
const freeMemory = os.freemem() / 1024 / 1024 / 1024;
const totalMemory = os.totalmem() / 1024 / 1024 / 1024;
const usedMemory = totalMemory - freeMemory;
// lets build some embed data
let embedData;
// If we have NO GPU config lets send system stats only
if (process.env.GPU == 0) {
embedData = {
color: 0x0099ff,
title: 'Please wait.. I am thinking...',
fields: [
{
name: 'System Load',
value: `${systemLoad.toFixed(2)}%`,
},
{
name: 'Memory Usage',
value: `${usedMemory.toFixed(2)} GB / ${totalMemory.toFixed(2)} GB`,
},
{
name: 'Time',
value: `~${time} seconds.`,
},
],
};
// if the message object doesn't exist, create it
if (!botMessage) {
(async () => {
if (time == 0) return
botMessage = await message.channel.send({ embeds: [embedData] });
})();
} else {
(async () => {
if (!isAnyConversationBusy()) {
botMessage.delete()
} else {
await botMessage.edit({ embeds: [embedData] }); // otherwise, update the message
}
})();
}
} else {
// If we do have GPU=1 lets send some card info too!
smi(function (err, data) {
if (err) {
// Handle error if smi function fails
console.error(err);
return;
}
let utilization = data.nvidia_smi_log.gpu.utilization;
let gpuUtilization = utilization.gpu_util;
let memoryUtilization = utilization.memory_util;
let gpuTemp = data.nvidia_smi_log.gpu.temperature.gpu_temp;
// These are not used until nvidia-docker fixes their support
let gpuTarget = data.nvidia_smi_log.gpu.temperature.gpu_target_temperature;
let gpuFanSpeed = data.nvidia_smi_log.gpu.fan_speed;
embedData = {
color: 0x0099ff,
title: 'Please wait.. I am thinking...',
fields: [
{
name: 'System Load',
value: `${systemLoad.toFixed(2)}%`,
},
{
name: 'Memory Usage',
value: `${usedMemory.toFixed(2)} GB / ${totalMemory.toFixed(2)} GB`,
},
{
name: 'GPU Utilization',
value: `${gpuUtilization}`,
},
{
name: 'Memory Utilization',
value: `${memoryUtilization}`,
},
{
name: 'GPU Temperature',
value: `${gpuTemp}`,
},
{
name: 'Time',
value: `~${time} seconds.`,
},
],
};
// if the message object doesn't exist, create it
if (!botMessage) {
(async () => {
if (time == 0) return
botMessage = await message.channel.send({ embeds: [embedData] });
})();
} else {
(async () => {
if (!isAnyConversationBusy()) {
botMessage.delete()
} else {
await botMessage.edit({ embeds: [embedData] }); // otherwise, update the message
}
})();
}
})
}
});
};
try {
// call the function initially
await showSystemLoad();
// Grab the REFRESH_INTERVAL from ENV if not exist, lets use 7 (seconds)
const refreshInterval = setInterval(showSystemLoad, (process.env.REFRESH_INTERVAL || 7) * 1000);
// Handle context size
// Encode the messages
const encodedTokens = llamaTokenizer.encode(messagesCopy);
// Check the token length
const tokenLength = encodedTokens.length;
console.log(`CTX SIZE: ${tokenLength}`);
// Remove older conversations if necessary
const maxLength = 2048;
if (tokenLength > maxLength) {
const diff = tokenLength - maxLength;
let removedTokens = 0;
// Iterate over the messages in reverse order
for (let i = messagesCopy.length - 1; i >= 0; i--) {
const message = messagesCopy[i];
const messageTokens = llamaTokenizer.encode([message]);
// Calculate the token length of the current message
const messageTokenLength = messageTokens.length;
// Remove the current message if it won't make the token length negative
if (removedTokens + messageTokenLength <= diff) {
messagesCopy.splice(i, 1);
removedTokens += messageTokenLength;
} else {
// Break the loop if removing the message would make the token length negative
break;
}
}
// Check the updated token length
}
console.log(`CTX SIZE AFTER PROCESSING: ${llamaTokenizer.encode(messagesCopy).length}`);
// Sending request to our API
const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
method: 'POST',
headers: {
'accept': 'application/json',
'Content-Type': 'application/json'
},
body: JSON.stringify({
messages: messagesCopy,
max_tokens: Number(process.env.MAX_TOKENS), // add the max_tokens parameter here
repeat_penalty: Number(process.env.REPEAT_PENALTY)
}),
signal: controller.signal
});
const responseData = await response.json();
console.log(JSON.stringify(responseData));
const choice = responseData.choices[0];
const responseText = choice.message.content;
// clear the interval, replace the "please wait" message with the response, and update the message
console.log(responseText);
await botMessage.delete()
clearInterval(refreshInterval);
botMessage = null;
return responseText;
} catch (err) {
throw err;
} finally {
clearTimeout(timeout);
botMessage = null;
time = 0
}
}
client.login(process.env.THE_TOKEN); // Replace with your bot token