From 03986d79799b052753286d36449e2621e4bb2205 Mon Sep 17 00:00:00 2001 From: Alexander Rogov Date: Tue, 9 Jun 2026 17:34:31 +0300 Subject: [PATCH] change fast model --- manifests/llama/main.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/manifests/llama/main.yaml b/manifests/llama/main.yaml index 9a62ef3..5b191af 100644 --- a/manifests/llama/main.yaml +++ b/manifests/llama/main.yaml @@ -27,9 +27,9 @@ spec: - name: HF_HOME value: /models/.hf - name: MODEL_REPO - value: "byteshape/Devstral-Small-2-24B-Instruct-2512-GGUF" + value: "byteshape/Qwen3.6-35B-A3B-GGUF" - name: MODEL_FILE - value: "Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf" + value: "Qwen3.6-35B-A3B-IQ3_S-3.00bpw.gguf" # optional, only if you need gated/private models # - name: HUGGING_FACE_HUB_TOKEN # valueFrom: @@ -82,7 +82,7 @@ spec: image: ghcr.io/ggml-org/llama.cpp:server-vulkan args: - "--model" - - "/models/Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf" + - "/models/Qwen3.6-35B-A3B-IQ3_S-3.00bpw.gguf" - "--host" - "0.0.0.0" - "--port"