diff --git a/llamabot.js b/llamabot.js index a9279a0..42e4ac3 100644 --- a/llamabot.js +++ b/llamabot.js @@ -192,7 +192,7 @@ client.on('messageCreate', async (message) => { // We can extend our message size using chunks, the issue? // Users can abuse this feature, we lock this to 15 to avoid API Abuse. const chunks = response.match(new RegExp(`.{1,${limit}}`, "g")); - if (chunks.length >= 15) return await message.channel.send("Response chunks too large. Try again"); + if (chunks.length >= 40) return await message.channel.send("Response chunks too large. Try again"); // If we do now have too many chunks, lets send each one using our overflow delay for (let i = 0; i < chunks.length; i++) { @@ -437,25 +437,26 @@ async function generateResponse(conversation, message) { function countLlamaTokens(messages) { let totalTokens = 0; - + for (const message of messages) { if (message.role === 'user' || message.role === 'assistant') { const encodedTokens = llamaTokenizer.encode(message.content); totalTokens += encodedTokens.length; } } - + return totalTokens; } - + let totalTokens = countLlamaTokens(messagesCopy); console.log(`Total Llama tokens: ${totalTokens}`); let tokenLength = totalTokens // Remove older conversations if necessary const maxLength = 1800; - if (tokenLength > maxLength) { - const diff = tokenLength - maxLength; + const tolerance = 100; // allow for some flexibility in the token length + if (tokenLength > maxLength + tolerance) { + const diff = tokenLength - (maxLength + tolerance); let removedTokens = 0; // Iterate over the messages in reverse order @@ -466,21 +467,22 @@ async function generateResponse(conversation, message) { // Calculate the token length of the current message const messageTokenLength = countLlamaTokens(messageTokens); - // Remove the current message if it won't make the token length negative + // Check if the current message plus the tokens in the message is less than or equal to the diff if (removedTokens + messageTokenLength <= diff) { messagesCopy.splice(i, 1); removedTokens += messageTokenLength; - console.log(removedTokens + " removed \nAfter Resize: " + countLlamaTokens(messagesCopy) - ) + console.log(removedTokens + " removed \nAfter Resize: " + countLlamaTokens(messagesCopy)); } else { - // Break the loop if removing the message would make the token length negative + // Remove more than one message if necessary to bring the total length below the maximum allowed length + const messagesToRemove = Math.floor(diff / messageTokenLength); + for (let j = 0; j < messagesToRemove; j++) { + messagesCopy.splice(i, 1); + removedTokens += messageTokenLength; + } break; } } - - // Check the updated token length } - // Sending request to our API const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, { method: 'POST',