improving token reducer logic
This commit is contained in:
parent
05e79cba3a
commit
a980f3cd43
28
llamabot.js
28
llamabot.js
@ -192,7 +192,7 @@ client.on('messageCreate', async (message) => {
|
|||||||
// We can extend our message size using chunks, the issue?
|
// We can extend our message size using chunks, the issue?
|
||||||
// Users can abuse this feature, we lock this to 15 to avoid API Abuse.
|
// Users can abuse this feature, we lock this to 15 to avoid API Abuse.
|
||||||
const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
|
const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
|
||||||
if (chunks.length >= 15) return await message.channel.send("Response chunks too large. Try again");
|
if (chunks.length >= 40) return await message.channel.send("Response chunks too large. Try again");
|
||||||
|
|
||||||
// If we do now have too many chunks, lets send each one using our overflow delay
|
// If we do now have too many chunks, lets send each one using our overflow delay
|
||||||
for (let i = 0; i < chunks.length; i++) {
|
for (let i = 0; i < chunks.length; i++) {
|
||||||
@ -437,25 +437,26 @@ async function generateResponse(conversation, message) {
|
|||||||
|
|
||||||
function countLlamaTokens(messages) {
|
function countLlamaTokens(messages) {
|
||||||
let totalTokens = 0;
|
let totalTokens = 0;
|
||||||
|
|
||||||
for (const message of messages) {
|
for (const message of messages) {
|
||||||
if (message.role === 'user' || message.role === 'assistant') {
|
if (message.role === 'user' || message.role === 'assistant') {
|
||||||
const encodedTokens = llamaTokenizer.encode(message.content);
|
const encodedTokens = llamaTokenizer.encode(message.content);
|
||||||
totalTokens += encodedTokens.length;
|
totalTokens += encodedTokens.length;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return totalTokens;
|
return totalTokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
let totalTokens = countLlamaTokens(messagesCopy);
|
let totalTokens = countLlamaTokens(messagesCopy);
|
||||||
console.log(`Total Llama tokens: ${totalTokens}`);
|
console.log(`Total Llama tokens: ${totalTokens}`);
|
||||||
let tokenLength = totalTokens
|
let tokenLength = totalTokens
|
||||||
|
|
||||||
// Remove older conversations if necessary
|
// Remove older conversations if necessary
|
||||||
const maxLength = 1800;
|
const maxLength = 1800;
|
||||||
if (tokenLength > maxLength) {
|
const tolerance = 100; // allow for some flexibility in the token length
|
||||||
const diff = tokenLength - maxLength;
|
if (tokenLength > maxLength + tolerance) {
|
||||||
|
const diff = tokenLength - (maxLength + tolerance);
|
||||||
let removedTokens = 0;
|
let removedTokens = 0;
|
||||||
|
|
||||||
// Iterate over the messages in reverse order
|
// Iterate over the messages in reverse order
|
||||||
@ -466,21 +467,22 @@ async function generateResponse(conversation, message) {
|
|||||||
// Calculate the token length of the current message
|
// Calculate the token length of the current message
|
||||||
const messageTokenLength = countLlamaTokens(messageTokens);
|
const messageTokenLength = countLlamaTokens(messageTokens);
|
||||||
|
|
||||||
// Remove the current message if it won't make the token length negative
|
// Check if the current message plus the tokens in the message is less than or equal to the diff
|
||||||
if (removedTokens + messageTokenLength <= diff) {
|
if (removedTokens + messageTokenLength <= diff) {
|
||||||
messagesCopy.splice(i, 1);
|
messagesCopy.splice(i, 1);
|
||||||
removedTokens += messageTokenLength;
|
removedTokens += messageTokenLength;
|
||||||
console.log(removedTokens + " removed \nAfter Resize: " + countLlamaTokens(messagesCopy)
|
console.log(removedTokens + " removed \nAfter Resize: " + countLlamaTokens(messagesCopy));
|
||||||
)
|
|
||||||
} else {
|
} else {
|
||||||
// Break the loop if removing the message would make the token length negative
|
// Remove more than one message if necessary to bring the total length below the maximum allowed length
|
||||||
|
const messagesToRemove = Math.floor(diff / messageTokenLength);
|
||||||
|
for (let j = 0; j < messagesToRemove; j++) {
|
||||||
|
messagesCopy.splice(i, 1);
|
||||||
|
removedTokens += messageTokenLength;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check the updated token length
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sending request to our API
|
// Sending request to our API
|
||||||
const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
|
const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
|
Loading…
Reference in New Issue
Block a user