diff --git a/manifests/llama/main.yaml b/manifests/llama/main.yaml index 9a62ef3..5b191af 100644 --- a/manifests/llama/main.yaml +++ b/manifests/llama/main.yaml @@ -27,9 +27,9 @@ spec: - name: HF_HOME value: /models/.hf - name: MODEL_REPO - value: "byteshape/Devstral-Small-2-24B-Instruct-2512-GGUF" + value: "byteshape/Qwen3.6-35B-A3B-GGUF" - name: MODEL_FILE - value: "Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf" + value: "Qwen3.6-35B-A3B-IQ3_S-3.00bpw.gguf" # optional, only if you need gated/private models # - name: HUGGING_FACE_HUB_TOKEN # valueFrom: @@ -82,7 +82,7 @@ spec: image: ghcr.io/ggml-org/llama.cpp:server-vulkan args: - "--model" - - "/models/Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf" + - "/models/Qwen3.6-35B-A3B-IQ3_S-3.00bpw.gguf" - "--host" - "0.0.0.0" - "--port"