diff --git a/manifests/llama/main.yaml b/manifests/llama/main.yaml index 5b191af..aca6269 100644 --- a/manifests/llama/main.yaml +++ b/manifests/llama/main.yaml @@ -93,9 +93,9 @@ spec: # performance tuning - "--ctx-size" - - "32768" + - "24576" - "--parallel" - - "4" + - "2" # KV cache quantization - "--cache-type-k"