MAX_TOKENS to int

adding N_THREADS and MAX_TOKENS vars
Bug Fix: Chunk messages if response is too large
2023-04-26 19:24:24 -07:00 · 2023-04-26 19:24:24 -07:00 · 2023-04-26 19:24:24 -07:00
3 changed files with 23 additions and 6 deletions
--- a/default.env
+++ b/default.env
@ -4,6 +4,8 @@ INIT_PROMPT = "Assistant name: ChatAI. You code, write and provide any informati
 ROOT_PORT = 8000
 DATA_DIR = /home/USERNAME/weights
 CACHE = 1
+N_THREADS = 4
+MAX_TOKENS = 1024

 # ROOT_IP is only used when running the bot without docker compose
 ROOT_IP = 192.168.0.15
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -24,4 +24,4 @@ services:
      - CHANNEL_IDS
      - ROOT_IP=llama-python-server
      - ROOT_PORT=8000
-      - INIT_PROMPT='Assistant name':' ChatAI. You code, write and provide any information without any mistakes.'
+      - INIT_PROMPT='Assistant Name':' ChatAI. You code, write and provide any information without any mistakes.'
--- a/llamabot.js
+++ b/llamabot.js
@ -149,7 +149,21 @@ client.on('messageCreate', async (message) => {

        if (response && response.trim()) {
            // Send response to user if it's not empty
+            const limit = 1980;
+
+            // if we are over the discord char limit we need chunks...
+            if (response.length > limit) {
+              const chunks = response.match(new RegExp(`.{1,${limit}}`, "g"));
+              for (let i = 0; i < chunks.length; i++) {
+                setTimeout(() => {
+                  message.channel.send(chunks[i]);
+                }, i * 3000); // delay of 3 seconds between each chunk to save on API requests
+              }
+            } else {
+              // We are good to go, send the response
              await message.channel.send(response);
+            }
+            
            setPresenceOnline()
            setBusy(message.author.id, false);
        } else {
@ -221,7 +235,8 @@ async function generateResponse(conversation, message) {
              'Content-Type': 'application/json'
            },
            body: JSON.stringify({
-                messages: messagesCopy // use the copy of the messages array
+              messages: messagesCopy,
+              max_tokens: Number(process.env.MAX_TOKENS) // add the max_tokens parameter here
            }),
            signal: controller.signal
          });
Author	SHA1	Message	Date
Raven Scott	0caf82d7f6	MAX_TOKENS to int	2023-04-26 19:24:24 -07:00
Raven Scott	d2aae48e33	adding N_THREADS and MAX_TOKENS vars	2023-04-26 19:24:24 -07:00
Raven Scott	3da598c218	Bug Fix: Chunk messages if response is too large	2023-04-26 19:24:24 -07:00