30 lines
563 B
YAML
30 lines
563 B
YAML
services:
|
|
llama-cpp:
|
|
image: ghcr.io/ggml-org/llama.cpp:server
|
|
container_name: llama-cpp-qwen
|
|
restart: unless-stopped
|
|
ports:
|
|
- "8000:8000"
|
|
volumes:
|
|
- ./models:/models
|
|
command:
|
|
- -m
|
|
- /models/qwen-3.5-0.8b/Qwen3.5-0.8B-UD-Q2_K_XL.gguf
|
|
- --mmproj
|
|
- /models/qwen-3.5-0.8b/mmproj-F16.gguf
|
|
- --host
|
|
- 0.0.0.0
|
|
- --port
|
|
- "8000"
|
|
- --ctx-size
|
|
- "16384"
|
|
- --temp
|
|
- "0.7"
|
|
- --top-p
|
|
- "0.8"
|
|
- --top-k
|
|
- "20"
|
|
- --min-p
|
|
- "0.00"
|
|
|