From d160219c6bf7fb3326a7411324750b02d6c891d5 Mon Sep 17 00:00:00 2001 From: Alexander Rogov Date: Tue, 9 Jun 2026 20:28:21 +0300 Subject: [PATCH] adjust llama gpu params --- manifests/llama/main.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/manifests/llama/main.yaml b/manifests/llama/main.yaml index 5b191af..aca6269 100644 --- a/manifests/llama/main.yaml +++ b/manifests/llama/main.yaml @@ -93,9 +93,9 @@ spec: # performance tuning - "--ctx-size" - - "32768" + - "24576" - "--parallel" - - "4" + - "2" # KV cache quantization - "--cache-type-k"