llama-cpp-python-djs-bot/gpu-server/Dockerfile

FROM nvidia/cuda:12.1.1-devel-ubuntu20.04

# Install the deps
ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=Etc/GMT
RUN apt-get update && apt-get install -y --no-install-recommends python3 python3-pip git cmake build-essential
# Get llama-cpp-python
WORKDIR /usr/src

WORKDIR /usr/src/app 

# Build llama-cpp-python w/CuBLAS
RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python[server]

# We need to set the host to 0.0.0.0 to allow outside access
ENV HOST 0.0.0.0

# Run the server
CMD python3 -m llama_cpp.server
adding NVIDIA GPU Support with Stats 2023-05-19 21:32:21 +02:00			`FROM nvidia/cuda:12.1.1-devel-ubuntu20.04`

			`# Install the deps`
			`ENV DEBIAN_FRONTEND=noninteractive`
			`ENV TZ=Etc/GMT`
update dockerfile for GPU 2023-05-31 23:02:07 +02:00			`RUN apt-get update && apt-get install -y --no-install-recommends python3 python3-pip git cmake build-essential`
adding NVIDIA GPU Support with Stats 2023-05-19 21:32:21 +02:00			`# Get llama-cpp-python`
			`WORKDIR /usr/src`

			`WORKDIR /usr/src/app`

			`# Build llama-cpp-python w/CuBLAS`
update dockerfile for GPU 2023-05-31 23:02:07 +02:00			`RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python[server]`
adding NVIDIA GPU Support with Stats 2023-05-19 21:32:21 +02:00
			`# We need to set the host to 0.0.0.0 to allow outside access`
			`ENV HOST 0.0.0.0`

			`# Run the server`
update dockerfile for GPU 2023-05-31 23:02:07 +02:00			`CMD python3 -m llama_cpp.server`