rayai/bot/default.env

# Discord Token
THE_TOKEN = ""

# The Channel IDs the bot will operate in seperated by commas
CHANNEL_IDS = 

INIT_PROMPT="You are an assitant"

# Key for AbuseDB
ABUSE_KEY=

# When a message is too large for discord we chunk the response into seperate messages.
# To ensure we do not rate limit the bot we send these at a delay interval.
# DEFAULT: 3 a good setting is between 3 and 7 seconds.
OVERFLOW_DELAY=3

# Max Content to fetch from given URLs
MAX_CONTENT_LENGTH=8000

# Max tokens for Generations
MAX_TOKENS = 4000

# ROOT_IP is only used when running the bot without docker compose
ROOT_IP = 192.168.0.8

# PORT  is only used when running the bot without docker compose
ROOT_PORT = 3000

# Directory to your models (llama.cpp specfic settings)
DATA_DIR = /models

# Enable Expirmental Message Caches (Limited to single session)
# Cache will use ~1.4 GB or MORE of RAM. ONLY ENABLE IF YOUR SYSTEM CAN HANDLE THIS.
CACHE = 1

CACHE_TYPE = "ram"

# Set number of threads to use, currently, a standard thread will utilize 1 whole core
# I usually will set this between all cores I physcally have OR 2 cores less to allow for other processes.
N_THREADS = 8

# Always use MMAP unless you know what you are doing
USE_MMAP=1

# Only use MLOCK if you know what it does!
USE_MLOCK=0

# The higher the number the more hard core.
REPEAT_PENALTY=1

# GPU SPECIFIC SETTINGS BELOW

GPU=1

N_GPU_LAYERS=35 

PYTHONUNBUFFERED=1

# Custom Stuff internal to my use cases.
PATH_KEY=""
API_KEY=""

API_PATH=""
first commit 2024-08-03 02:19:51 -04:00			`# Discord Token`
			`THE_TOKEN = ""`

			`# The Channel IDs the bot will operate in seperated by commas`
			`CHANNEL_IDS =`

			`INIT_PROMPT="You are an assitant"`

			`# Key for AbuseDB`
			`ABUSE_KEY=`

			`# When a message is too large for discord we chunk the response into seperate messages.`
			`# To ensure we do not rate limit the bot we send these at a delay interval.`
			`# DEFAULT: 3 a good setting is between 3 and 7 seconds.`
			`OVERFLOW_DELAY=3`

			`# Max Content to fetch from given URLs`
			`MAX_CONTENT_LENGTH=8000`

			`# Max tokens for Generations`
			`MAX_TOKENS = 4000`

			`# ROOT_IP is only used when running the bot without docker compose`
			`ROOT_IP = 192.168.0.8`

			`# PORT is only used when running the bot without docker compose`
			`ROOT_PORT = 3000`

			`# Directory to your models (llama.cpp specfic settings)`
			`DATA_DIR = /models`

			`# Enable Expirmental Message Caches (Limited to single session)`
			`# Cache will use ~1.4 GB or MORE of RAM. ONLY ENABLE IF YOUR SYSTEM CAN HANDLE THIS.`
			`CACHE = 1`

			`CACHE_TYPE = "ram"`

			`# Set number of threads to use, currently, a standard thread will utilize 1 whole core`
			`# I usually will set this between all cores I physcally have OR 2 cores less to allow for other processes.`
			`N_THREADS = 8`

			`# Always use MMAP unless you know what you are doing`
			`USE_MMAP=1`

			`# Only use MLOCK if you know what it does!`
			`USE_MLOCK=0`

			`# The higher the number the more hard core.`
			`REPEAT_PENALTY=1`

			`# GPU SPECIFIC SETTINGS BELOW`

			`GPU=1`

			`N_GPU_LAYERS=35`

			`PYTHONUNBUFFERED=1`

			`# Custom Stuff internal to my use cases.`
			`PATH_KEY=""`
			`API_KEY=""`

			`API_PATH=""`