llama-cpp-python-djs-bot/default.env

59 lines
1.6 KiB
Bash

# Discord Token
THE_TOKEN = ""
# The Channel IDs the bot will operate in seperated by commas
CHANNEL_IDS =
# The INIT prompt for all conversations.
INIT_PROMPT = "Assistant name: ChatAI. You code, write and provide any information without any mistakes. You can read basic text from URLs if a user sends a user via scraping."
# Loading Emebed Refresh Timing
REFRESH_INTERVAL=2
# When a message is too large for discord we chunk the response into seperate messages.
# To ensure we do not rate limit the bot we send these at a delay interval.
# DEFAULT: 3 a good setting is between 3 and 7 seconds.
OVERFLOW_DELAY=3
# Max Content to fetch from given URLs
MAX_CONTENT_LENGTH=2000
# Max tokens for Generations
MAX_TOKENS = 1499
# ROOT_IP is only used when running the bot without docker compose
ROOT_IP = 127.0.0.1
# PORT is only used when running the bot without docker compose
ROOT_PORT = 8000
# Directory to your models (llama.cpp specfic settings)
DATA_DIR = /Users/username/code/models
# Enable Expirmental Message Caches (Limited to single session)
# Cache will use ~1.4 GB or MORE of RAM. ONLY ENABLE IF YOUR SYSTEM CAN HANDLE THIS.
CACHE = 1
CACHE_TYPE = "disk"
# Set number of threads to use, currently, a standard thread will utilize 1 whole core
# I usually will set this between all cores I physcally have OR 2 cores less to allow for other processes.
N_THREADS = 10
# Always use MMAP unless you know what you are doing
#USE_MMAP=1
# Only use MLOCK if you know what it does!
USE_MLOCK=0
# The higher the number the more hard core.
REPEAT_PENALTY=1
# GPU SPECIFIC SETTINGS BELOW
GPU=0
N_GPU_LAYERS=32
PYTHONUNBUFFERED=1