update dockerfile for GPU
This commit is contained in:
parent
61c2fed773
commit
20c83a656a
@ -3,30 +3,17 @@ FROM nvidia/cuda:12.1.1-devel-ubuntu20.04
|
|||||||
# Install the deps
|
# Install the deps
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
ENV TZ=Etc/GMT
|
ENV TZ=Etc/GMT
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends python3 python3-pip git cmake
|
RUN apt-get update && apt-get install -y --no-install-recommends python3 python3-pip git cmake build-essential
|
||||||
|
|
||||||
# Get llama-cpp-python
|
# Get llama-cpp-python
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
|
|
||||||
RUN git clone https://github.com/abetlen/llama-cpp-python.git
|
|
||||||
|
|
||||||
RUN mv llama-cpp-python app
|
|
||||||
|
|
||||||
WORKDIR /usr/src/app
|
WORKDIR /usr/src/app
|
||||||
|
|
||||||
#RUN git clone https://github.com/gjmulder/llama-cpp-python.git
|
|
||||||
#RUN git checkout improved-unit-tests
|
|
||||||
|
|
||||||
# Patch .gitmodules to use HTTPS
|
|
||||||
RUN sed -i 's|git@github.com:ggerganov/llama.cpp.git|https://github.com/ggerganov/llama.cpp.git|' .gitmodules
|
|
||||||
RUN git submodule update --init --recursive
|
|
||||||
|
|
||||||
# Build llama-cpp-python w/CuBLAS
|
# Build llama-cpp-python w/CuBLAS
|
||||||
RUN grep --colour "n_batch" ./llama_cpp/server/*.py
|
RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python[server]
|
||||||
RUN pip install scikit-build fastapi sse_starlette uvicorn && LLAMA_CUBLAS=1 python3 setup.py develop
|
|
||||||
|
|
||||||
# We need to set the host to 0.0.0.0 to allow outside access
|
# We need to set the host to 0.0.0.0 to allow outside access
|
||||||
ENV HOST 0.0.0.0
|
ENV HOST 0.0.0.0
|
||||||
|
|
||||||
# Run the server
|
# Run the server
|
||||||
CMD python3 -m llama_cpp.server
|
CMD python3 -m llama_cpp.server
|
Loading…
Reference in New Issue
Block a user