From caee50628ce7c413ac1cebbe07df2859fcb23c54 Mon Sep 17 00:00:00 2001 From: Lenoctambule <106790775+lenoctambule@users.noreply.github.com> Date: Tue, 17 Mar 2026 21:05:06 +0100 Subject: [PATCH] feat: use Q8 quantization --- docker-compose.yml | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 938f9fe..f84046d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,26 +4,18 @@ services: container_name: llama-cpp-qwen restart: unless-stopped ports: - - "8000:8000" + - 8000:8000 volumes: - ./models:/models - command: - - -m - - /models/qwen-3.5-0.8b/Qwen3.5-0.8B-UD-Q2_K_XL.gguf - - --mmproj - - /models/qwen-3.5-0.8b/mmproj-F16.gguf - - --host - - 0.0.0.0 - - --port - - "8000" - - --ctx-size - - "16384" - - --temp - - "0.7" - - --top-p - - "0.8" - - --top-k - - "20" - - --min-p - - "0.00" - + command: > + -m /models/qwen/Qwen3.5-0.8B-UD-Q8_K_XL.gguf + --mmproj /models/qwen/mmproj-F16.gguf + --host 0.0.0.0 --port "8000" + --ctx-size "16384" + --temp "0.6" + --top-p "1.00" + --top-k "20" + --min-p "0.00" + --presence-penalty 2.0 + --repeat-penalty 1.0 + --chat-template-kwargs '{"enable_thinking":false}'