adjust llama gpu params

This commit is contained in:
2026-06-09 20:28:21 +03:00
parent 866a2d19de
commit d160219c6b

View File

@@ -93,9 +93,9 @@ spec:
# performance tuning
- "--ctx-size"
- "32768"
- "24576"
- "--parallel"
- "4"
- "2"
# KV cache quantization
- "--cache-type-k"