services: llama-cpp: image: ghcr.io/ggml-org/llama.cpp:server container_name: llama-cpp-qwen restart: unless-stopped ports: - "8000:8000" volumes: - ./models:/models command: - -m - /models/qwen-3.5-0.8b/Qwen3.5-0.8B-UD-Q2_K_XL.gguf - --mmproj - /models/qwen-3.5-0.8b/mmproj-F16.gguf - --host - 0.0.0.0 - --port - "8000" - --ctx-size - "16384" - --temp - "0.7" - --top-p - "0.8" - --top-k - "20" - --min-p - "0.00"