llama-cpp-python-djs-bot/docker-compose.gpu.yml

version: '3.9'

services:
  backend:
    container_name: llama-gpu-server
    restart: unless-stopped
    build:
      context: ./gpu-server
    env_file: .env
    volumes:
      - ${DATA_DIR}:/usr/src/app/models
    environment:
      - HOST=llama-gpu-server
      - MODEL=./models/ggml-vic7b-q5_1.bin.1
      - NVIDIA_VISIBLE_DEVICES=all
    runtime: nvidia

  frontend:
    container_name: llama-djs-bot
    restart: unless-stopped
    build:
      context: .
    depends_on:
      - backend
    environment:
      - THE_TOKEN
      - REFRESH_INTERVAL
      - CHANNEL_IDS
      - GPU
      - ROOT_IP=llama-gpu-server
      - ROOT_PORT=8000
      - INIT_PROMPT='Assistant Name':' ChatAI. You code, write and provide any information without any mistakes.'
      - NVIDIA_VISIBLE_DEVICES=all
    runtime: nvidia