FROM nvidia/cuda:12.1.1-devel-ubuntu20.04 # Install the deps ENV DEBIAN_FRONTEND=noninteractive ENV TZ=Etc/GMT RUN apt-get update && apt-get install -y --no-install-recommends python3 python3-pip git cmake build-essential # Get llama-cpp-python WORKDIR /usr/src WORKDIR /usr/src/app # Build llama-cpp-python w/CuBLAS RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python[server] # We need to set the host to 0.0.0.0 to allow outside access ENV HOST 0.0.0.0 # Run the server CMD python3 -m llama_cpp.server