improving token reducer logic
This commit is contained in:
parent
05e79cba3a
commit
a980f3cd43
28
llamabot.js
28
llamabot.js
@ -192,7 +192,7 @@ client.on('messageCreate', async (message) => {
|
||||
// We can extend our message size using chunks, the issue?
|
||||
// Users can abuse this feature, we lock this to 15 to avoid API Abuse.
|
||||
const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
|
||||
if (chunks.length >= 15) return await message.channel.send("Response chunks too large. Try again");
|
||||
if (chunks.length >= 40) return await message.channel.send("Response chunks too large. Try again");
|
||||
|
||||
// If we do now have too many chunks, lets send each one using our overflow delay
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
@ -437,25 +437,26 @@ async function generateResponse(conversation, message) {
|
||||
|
||||
function countLlamaTokens(messages) {
|
||||
let totalTokens = 0;
|
||||
|
||||
|
||||
for (const message of messages) {
|
||||
if (message.role === 'user' || message.role === 'assistant') {
|
||||
const encodedTokens = llamaTokenizer.encode(message.content);
|
||||
totalTokens += encodedTokens.length;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return totalTokens;
|
||||
}
|
||||
|
||||
|
||||
let totalTokens = countLlamaTokens(messagesCopy);
|
||||
console.log(`Total Llama tokens: ${totalTokens}`);
|
||||
let tokenLength = totalTokens
|
||||
|
||||
// Remove older conversations if necessary
|
||||
const maxLength = 1800;
|
||||
if (tokenLength > maxLength) {
|
||||
const diff = tokenLength - maxLength;
|
||||
const tolerance = 100; // allow for some flexibility in the token length
|
||||
if (tokenLength > maxLength + tolerance) {
|
||||
const diff = tokenLength - (maxLength + tolerance);
|
||||
let removedTokens = 0;
|
||||
|
||||
// Iterate over the messages in reverse order
|
||||
@ -466,21 +467,22 @@ async function generateResponse(conversation, message) {
|
||||
// Calculate the token length of the current message
|
||||
const messageTokenLength = countLlamaTokens(messageTokens);
|
||||
|
||||
// Remove the current message if it won't make the token length negative
|
||||
// Check if the current message plus the tokens in the message is less than or equal to the diff
|
||||
if (removedTokens + messageTokenLength <= diff) {
|
||||
messagesCopy.splice(i, 1);
|
||||
removedTokens += messageTokenLength;
|
||||
console.log(removedTokens + " removed \nAfter Resize: " + countLlamaTokens(messagesCopy)
|
||||
)
|
||||
console.log(removedTokens + " removed \nAfter Resize: " + countLlamaTokens(messagesCopy));
|
||||
} else {
|
||||
// Break the loop if removing the message would make the token length negative
|
||||
// Remove more than one message if necessary to bring the total length below the maximum allowed length
|
||||
const messagesToRemove = Math.floor(diff / messageTokenLength);
|
||||
for (let j = 0; j < messagesToRemove; j++) {
|
||||
messagesCopy.splice(i, 1);
|
||||
removedTokens += messageTokenLength;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Check the updated token length
|
||||
}
|
||||
|
||||
// Sending request to our API
|
||||
const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
|
Loading…
Reference in New Issue
Block a user