update dockerfile for GPU

2023-05-31 23:02:07 +02:00 · 2023-05-31 23:02:07 +02:00 · 20c83a656a
commit 20c83a656a
parent 61c2fed773
1 changed files with 3 additions and 16 deletions
--- a/gpu-server/Dockerfile
+++ b/gpu-server/Dockerfile
@ -3,30 +3,17 @@ FROM nvidia/cuda:12.1.1-devel-ubuntu20.04
 # Install the deps
 ENV DEBIAN_FRONTEND=noninteractive
 ENV TZ=Etc/GMT
-RUN apt-get update && apt-get install -y --no-install-recommends python3 python3-pip git cmake
+RUN apt-get update && apt-get install -y --no-install-recommends python3 python3-pip git cmake build-essential
 # Get llama-cpp-python
 WORKDIR /usr/src
 RUN git clone https://github.com/abetlen/llama-cpp-python.git
 RUN mv llama-cpp-python app
 WORKDIR /usr/src/app 
 #RUN git clone https://github.com/gjmulder/llama-cpp-python.git
 #RUN git checkout improved-unit-tests
 # Patch .gitmodules to use HTTPS
 RUN sed -i 's|git@github.com:ggerganov/llama.cpp.git|https://github.com/ggerganov/llama.cpp.git|' .gitmodules
 RUN git submodule update --init --recursive
 # Build llama-cpp-python w/CuBLAS
-RUN grep --colour "n_batch" ./llama_cpp/server/*.py
+RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python[server]
 RUN pip install scikit-build fastapi sse_starlette uvicorn && LLAMA_CUBLAS=1 python3 setup.py develop
 # We need to set the host to 0.0.0.0 to allow outside access
 ENV HOST 0.0.0.0
 # Run the server
-CMD python3 -m llama_cpp.server
+CMD python3 -m llama_cpp.server