change fast model

2026-06-09 17:34:31 +03:00
parent 8157d70e3f
commit 03986d7979
1 changed files with 3 additions and 3 deletions
--- a/manifests/llama/main.yaml
+++ b/manifests/llama/main.yaml
@@ -27,9 +27,9 @@ spec:
            - name: HF_HOME
              value: /models/.hf
            - name: MODEL_REPO
-              value: "byteshape/Devstral-Small-2-24B-Instruct-2512-GGUF"
+              value: "byteshape/Qwen3.6-35B-A3B-GGUF"
            - name: MODEL_FILE
-              value: "Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf"
+              value: "Qwen3.6-35B-A3B-IQ3_S-3.00bpw.gguf"
            # optional, only if you need gated/private models
            # - name: HUGGING_FACE_HUB_TOKEN
            #   valueFrom:
@@ -82,7 +82,7 @@ spec:
          image: ghcr.io/ggml-org/llama.cpp:server-vulkan
          args:
            - "--model"
-            - "/models/Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf"
+            - "/models/Qwen3.6-35B-A3B-IQ3_S-3.00bpw.gguf"
            - "--host"
            - "0.0.0.0"
            - "--port"