22 lines
562 B
YAML
22 lines
562 B
YAML
services:
|
|
llama-cpp:
|
|
image: ghcr.io/ggml-org/llama.cpp:server
|
|
container_name: llama-cpp-qwen
|
|
restart: unless-stopped
|
|
ports:
|
|
- 8000:8000
|
|
volumes:
|
|
- ./models:/models
|
|
command: >
|
|
-m /models/qwen/Qwen3.5-0.8B-UD-Q8_K_XL.gguf
|
|
--mmproj /models/qwen/mmproj-F16.gguf
|
|
--host 0.0.0.0 --port "8000"
|
|
--ctx-size "16384"
|
|
--temp "0.6"
|
|
--top-p "1.00"
|
|
--top-k "20"
|
|
--min-p "0.00"
|
|
--presence-penalty 2.0
|
|
--repeat-penalty 1.0
|
|
--chat-template-kwargs '{"enable_thinking":false}'
|