rayai/llama-cpp-python/llama-cpp-python.env
2024-08-03 02:19:51 -04:00

36 lines
807 B
Bash

INIT_PROMPT="You are an assitant"
# Max tokens for Generations
MAX_TOKENS = 4000
# Directory to your models (llama.cpp specfic settings)
DATA_DIR = /models
# Enable Expirmental Message Caches (Limited to single session)
# Cache will use ~1.4 GB or MORE of RAM. ONLY ENABLE IF YOUR SYSTEM CAN HANDLE THIS.
CACHE = 1
CACHE_TYPE = "ram"
# Set number of threads to use, currently, a standard thread will utilize 1 whole core
# I usually will set this between all cores I physcally have OR 2 cores less to allow for other processes.
N_THREADS = 8
# Always use MMAP unless you know what you are doing
USE_MMAP=1
# Only use MLOCK if you know what it does!
USE_MLOCK=0
# The higher the number the more hard core.
REPEAT_PENALTY=1
# GPU SPECIFIC SETTINGS BELOW
GPU=1
N_GPU_LAYERS=35
PYTHONUNBUFFERED=1