improving token reducer logic

This commit is contained in:
Raven Scott 2023-08-15 01:41:39 -04:00
parent 05e79cba3a
commit a980f3cd43

View File

@ -192,7 +192,7 @@ client.on('messageCreate', async (message) => {
// We can extend our message size using chunks, the issue? // We can extend our message size using chunks, the issue?
// Users can abuse this feature, we lock this to 15 to avoid API Abuse. // Users can abuse this feature, we lock this to 15 to avoid API Abuse.
const chunks = response.match(new RegExp(`.{1,${limit}}`, "g")); const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
if (chunks.length >= 15) return await message.channel.send("Response chunks too large. Try again"); if (chunks.length >= 40) return await message.channel.send("Response chunks too large. Try again");
// If we do now have too many chunks, lets send each one using our overflow delay // If we do now have too many chunks, lets send each one using our overflow delay
for (let i = 0; i < chunks.length; i++) { for (let i = 0; i < chunks.length; i++) {
@ -454,8 +454,9 @@ async function generateResponse(conversation, message) {
// Remove older conversations if necessary // Remove older conversations if necessary
const maxLength = 1800; const maxLength = 1800;
if (tokenLength > maxLength) { const tolerance = 100; // allow for some flexibility in the token length
const diff = tokenLength - maxLength; if (tokenLength > maxLength + tolerance) {
const diff = tokenLength - (maxLength + tolerance);
let removedTokens = 0; let removedTokens = 0;
// Iterate over the messages in reverse order // Iterate over the messages in reverse order
@ -466,21 +467,22 @@ async function generateResponse(conversation, message) {
// Calculate the token length of the current message // Calculate the token length of the current message
const messageTokenLength = countLlamaTokens(messageTokens); const messageTokenLength = countLlamaTokens(messageTokens);
// Remove the current message if it won't make the token length negative // Check if the current message plus the tokens in the message is less than or equal to the diff
if (removedTokens + messageTokenLength <= diff) { if (removedTokens + messageTokenLength <= diff) {
messagesCopy.splice(i, 1); messagesCopy.splice(i, 1);
removedTokens += messageTokenLength; removedTokens += messageTokenLength;
console.log(removedTokens + " removed \nAfter Resize: " + countLlamaTokens(messagesCopy) console.log(removedTokens + " removed \nAfter Resize: " + countLlamaTokens(messagesCopy));
)
} else { } else {
// Break the loop if removing the message would make the token length negative // Remove more than one message if necessary to bring the total length below the maximum allowed length
const messagesToRemove = Math.floor(diff / messageTokenLength);
for (let j = 0; j < messagesToRemove; j++) {
messagesCopy.splice(i, 1);
removedTokens += messageTokenLength;
}
break; break;
} }
} }
// Check the updated token length
} }
// Sending request to our API // Sending request to our API
const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, { const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
method: 'POST', method: 'POST',