diff --git a/gpu-server/Dockerfile b/gpu-server/Dockerfile index 1c3604d..172f69c 100644 --- a/gpu-server/Dockerfile +++ b/gpu-server/Dockerfile @@ -3,30 +3,17 @@ FROM nvidia/cuda:12.1.1-devel-ubuntu20.04 # Install the deps ENV DEBIAN_FRONTEND=noninteractive ENV TZ=Etc/GMT -RUN apt-get update && apt-get install -y --no-install-recommends python3 python3-pip git cmake - +RUN apt-get update && apt-get install -y --no-install-recommends python3 python3-pip git cmake build-essential # Get llama-cpp-python WORKDIR /usr/src -RUN git clone https://github.com/abetlen/llama-cpp-python.git - -RUN mv llama-cpp-python app - WORKDIR /usr/src/app -#RUN git clone https://github.com/gjmulder/llama-cpp-python.git -#RUN git checkout improved-unit-tests - -# Patch .gitmodules to use HTTPS -RUN sed -i 's|git@github.com:ggerganov/llama.cpp.git|https://github.com/ggerganov/llama.cpp.git|' .gitmodules -RUN git submodule update --init --recursive - # Build llama-cpp-python w/CuBLAS -RUN grep --colour "n_batch" ./llama_cpp/server/*.py -RUN pip install scikit-build fastapi sse_starlette uvicorn && LLAMA_CUBLAS=1 python3 setup.py develop +RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python[server] # We need to set the host to 0.0.0.0 to allow outside access ENV HOST 0.0.0.0 # Run the server -CMD python3 -m llama_cpp.server +CMD python3 -m llama_cpp.server \ No newline at end of file