improving token reducer logic

2023-08-15 01:41:39 -04:00
parent 05e79cba3a
commit a980f3cd43
1 changed files with 15 additions and 13 deletions
--- a/llamabot.js
+++ b/llamabot.js
@ -192,7 +192,7 @@ client.on('messageCreate', async (message) => {
                // We can extend our message size using chunks, the issue? 
                // Users can abuse this feature, we lock this to 15 to avoid API Abuse.
                const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
-                if (chunks.length >= 15) return await message.channel.send("Response chunks too large. Try again");
+                if (chunks.length >= 40) return await message.channel.send("Response chunks too large. Try again");

                // If we do now have too many chunks, lets send each one using our overflow delay
                for (let i = 0; i < chunks.length; i++) {
@ -437,25 +437,26 @@ async function generateResponse(conversation, message) {

        function countLlamaTokens(messages) {
            let totalTokens = 0;
-        
+
            for (const message of messages) {
                if (message.role === 'user' || message.role === 'assistant') {
                    const encodedTokens = llamaTokenizer.encode(message.content);
                    totalTokens += encodedTokens.length;
                }
            }
-        
+
            return totalTokens;
        }
-        
+
        let totalTokens = countLlamaTokens(messagesCopy);
        console.log(`Total Llama tokens: ${totalTokens}`);
        let tokenLength = totalTokens

        // Remove older conversations if necessary
        const maxLength = 1800;
-        if (tokenLength > maxLength) {
-            const diff = tokenLength - maxLength;
+        const tolerance = 100; // allow for some flexibility in the token length
+        if (tokenLength > maxLength + tolerance) {
+            const diff = tokenLength - (maxLength + tolerance);
            let removedTokens = 0;

            // Iterate over the messages in reverse order
@ -466,21 +467,22 @@ async function generateResponse(conversation, message) {
                // Calculate the token length of the current message
                const messageTokenLength = countLlamaTokens(messageTokens);

-                // Remove the current message if it won't make the token length negative
+                // Check if the current message plus the tokens in the message is less than or equal to the diff
                if (removedTokens + messageTokenLength <= diff) {
                    messagesCopy.splice(i, 1);
                    removedTokens += messageTokenLength;
-                    console.log(removedTokens + " removed \nAfter Resize: " + countLlamaTokens(messagesCopy)
-                    )
+                    console.log(removedTokens + " removed \nAfter Resize: " + countLlamaTokens(messagesCopy));
                } else {
-                    // Break the loop if removing the message would make the token length negative
+                    // Remove more than one message if necessary to bring the total length below the maximum allowed length
+                    const messagesToRemove = Math.floor(diff / messageTokenLength);
+                    for (let j = 0; j < messagesToRemove; j++) {
+                        messagesCopy.splice(i, 1);
+                        removedTokens += messageTokenLength;
+                    }
                    break;
                }
            }
-
-            // Check the updated token length
        }
-
        // Sending request to our API
        const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
            method: 'POST',