improving token reducer logic

This commit is contained in:
Raven Scott 2023-08-15 01:41:39 -04:00
parent 05e79cba3a
commit a980f3cd43
1 changed files with 15 additions and 13 deletions

View File

@ -192,7 +192,7 @@ client.on('messageCreate', async (message) => {
// We can extend our message size using chunks, the issue?
// Users can abuse this feature, we lock this to 15 to avoid API Abuse.
const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
if (chunks.length >= 15) return await message.channel.send("Response chunks too large. Try again");
if (chunks.length >= 40) return await message.channel.send("Response chunks too large. Try again");
// If we do now have too many chunks, lets send each one using our overflow delay
for (let i = 0; i < chunks.length; i++) {
@ -437,25 +437,26 @@ async function generateResponse(conversation, message) {
function countLlamaTokens(messages) {
let totalTokens = 0;
for (const message of messages) {
if (message.role === 'user' || message.role === 'assistant') {
const encodedTokens = llamaTokenizer.encode(message.content);
totalTokens += encodedTokens.length;
}
}
return totalTokens;
}
let totalTokens = countLlamaTokens(messagesCopy);
console.log(`Total Llama tokens: ${totalTokens}`);
let tokenLength = totalTokens
// Remove older conversations if necessary
const maxLength = 1800;
if (tokenLength > maxLength) {
const diff = tokenLength - maxLength;
const tolerance = 100; // allow for some flexibility in the token length
if (tokenLength > maxLength + tolerance) {
const diff = tokenLength - (maxLength + tolerance);
let removedTokens = 0;
// Iterate over the messages in reverse order
@ -466,21 +467,22 @@ async function generateResponse(conversation, message) {
// Calculate the token length of the current message
const messageTokenLength = countLlamaTokens(messageTokens);
// Remove the current message if it won't make the token length negative
// Check if the current message plus the tokens in the message is less than or equal to the diff
if (removedTokens + messageTokenLength <= diff) {
messagesCopy.splice(i, 1);
removedTokens += messageTokenLength;
console.log(removedTokens + " removed \nAfter Resize: " + countLlamaTokens(messagesCopy)
)
console.log(removedTokens + " removed \nAfter Resize: " + countLlamaTokens(messagesCopy));
} else {
// Break the loop if removing the message would make the token length negative
// Remove more than one message if necessary to bring the total length below the maximum allowed length
const messagesToRemove = Math.floor(diff / messageTokenLength);
for (let j = 0; j < messagesToRemove; j++) {
messagesCopy.splice(i, 1);
removedTokens += messageTokenLength;
}
break;
}
}
// Check the updated token length
}
// Sending request to our API
const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
method: 'POST',