diff --git a/default.env b/default.env index 8d88597..8858fb8 100644 --- a/default.env +++ b/default.env @@ -37,3 +37,9 @@ CACHE = 1 # Set number of threads to use, currently, a standard thread will utilize 1 whole core # I usually will set this between all cores I physcally have OR 2 cores less to allow for other processes. N_THREADS = 4 + +# Always use MMAP unless you know what you are doing +USE_MMAP=1 + +# Only use MLOCK if you know what it does! +USE_MLOCK=0 diff --git a/docker-compose.yml b/docker-compose.yml index c3be378..65506e8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,6 +11,7 @@ services: - ${DATA_DIR}:/usr/src/app/models environment: - HOST=llama-python-server + - MODEL=./models/ggml-vic7b-q4_0.bin llama-python-djs-bot: container_name: llama-python-djs-bot restart: unless-stopped diff --git a/server/Dockerfile b/server/Dockerfile index 3059fe0..cbf8e46 100644 --- a/server/Dockerfile +++ b/server/Dockerfile @@ -8,4 +8,4 @@ WORKDIR /usr/src/app RUN pip install --no-cache-dir llama-cpp-python[server] -CMD python3 -m llama_cpp.server --model /usr/src/app/models/gpt4-x-alpaca-13b-native-4bit-128g.bin \ No newline at end of file +CMD python3 -m llama_cpp.server \ No newline at end of file