adjust llama gpu params

This commit is contained in:
2026-06-09 20:28:21 +03:00
parent 866a2d19de
commit d160219c6b

View File

@@ -93,9 +93,9 @@ spec:
# performance tuning # performance tuning
- "--ctx-size" - "--ctx-size"
- "32768" - "24576"
- "--parallel" - "--parallel"
- "4" - "2"
# KV cache quantization # KV cache quantization
- "--cache-type-k" - "--cache-type-k"