first commit
This commit is contained in:
35
llama-cpp-python/llama-cpp-python.env
Normal file
35
llama-cpp-python/llama-cpp-python.env
Normal file
@ -0,0 +1,35 @@
|
||||
|
||||
INIT_PROMPT="You are an assitant"
|
||||
|
||||
# Max tokens for Generations
|
||||
MAX_TOKENS = 4000
|
||||
|
||||
# Directory to your models (llama.cpp specfic settings)
|
||||
DATA_DIR = /models
|
||||
|
||||
# Enable Expirmental Message Caches (Limited to single session)
|
||||
# Cache will use ~1.4 GB or MORE of RAM. ONLY ENABLE IF YOUR SYSTEM CAN HANDLE THIS.
|
||||
CACHE = 1
|
||||
|
||||
CACHE_TYPE = "ram"
|
||||
|
||||
# Set number of threads to use, currently, a standard thread will utilize 1 whole core
|
||||
# I usually will set this between all cores I physcally have OR 2 cores less to allow for other processes.
|
||||
N_THREADS = 8
|
||||
|
||||
# Always use MMAP unless you know what you are doing
|
||||
USE_MMAP=1
|
||||
|
||||
# Only use MLOCK if you know what it does!
|
||||
USE_MLOCK=0
|
||||
|
||||
# The higher the number the more hard core.
|
||||
REPEAT_PENALTY=1
|
||||
|
||||
# GPU SPECIFIC SETTINGS BELOW
|
||||
|
||||
GPU=1
|
||||
|
||||
N_GPU_LAYERS=35
|
||||
|
||||
PYTHONUNBUFFERED=1
|
Reference in New Issue
Block a user