adding N_THREADS and MAX_TOKENS vars

This commit is contained in:
Raven Scott 2023-04-27 02:56:27 +02:00 committed by MrTuxedo
parent 3da598c218
commit d2aae48e33
3 changed files with 8 additions and 5 deletions

View File

@ -4,6 +4,8 @@ INIT_PROMPT = "Assistant name: ChatAI. You code, write and provide any informati
ROOT_PORT = 8000
DATA_DIR = /home/USERNAME/weights
CACHE = 1
N_THREADS = 4
MAX_TOKENS = 1024
# ROOT_IP is only used when running the bot without docker compose
ROOT_IP = 192.168.0.15

View File

@ -24,4 +24,4 @@ services:
- CHANNEL_IDS
- ROOT_IP=llama-python-server
- ROOT_PORT=8000
- INIT_PROMPT='Assistant name':' ChatAI. You code, write and provide any information without any mistakes.'
- INIT_PROMPT='Assistant Name':' ChatAI. You code, write and provide any information without any mistakes.'

View File

@ -231,14 +231,15 @@ async function generateResponse(conversation, message) {
const response = await fetch(`http://${process.env.ROOT_IP}:${process.env.ROOT_PORT}/v1/chat/completions`, {
method: 'POST',
headers: {
'accept': 'application/json',
'Content-Type': 'application/json'
'accept': 'application/json',
'Content-Type': 'application/json'
},
body: JSON.stringify({
messages: messagesCopy // use the copy of the messages array
messages: messagesCopy,
max_tokens: process.env.MAX_TOKENS // add the max_tokens parameter here
}),
signal: controller.signal
});
});
const responseData = await response.json();
console.log(JSON.stringify(responseData));