feat: use Q8 quantization

This commit is contained in:
Lenoctambule
2026-03-17 21:05:06 +01:00
parent c155aa1892
commit caee50628c

View File

@@ -4,26 +4,18 @@ services:
container_name: llama-cpp-qwen container_name: llama-cpp-qwen
restart: unless-stopped restart: unless-stopped
ports: ports:
- "8000:8000" - 8000:8000
volumes: volumes:
- ./models:/models - ./models:/models
command: command: >
- -m -m /models/qwen/Qwen3.5-0.8B-UD-Q8_K_XL.gguf
- /models/qwen-3.5-0.8b/Qwen3.5-0.8B-UD-Q2_K_XL.gguf --mmproj /models/qwen/mmproj-F16.gguf
- --mmproj --host 0.0.0.0 --port "8000"
- /models/qwen-3.5-0.8b/mmproj-F16.gguf --ctx-size "16384"
- --host --temp "0.6"
- 0.0.0.0 --top-p "1.00"
- --port --top-k "20"
- "8000" --min-p "0.00"
- --ctx-size --presence-penalty 2.0
- "16384" --repeat-penalty 1.0
- --temp --chat-template-kwargs '{"enable_thinking":false}'
- "0.7"
- --top-p
- "0.8"
- --top-k
- "20"
- --min-p
- "0.00"