change fast model

This commit is contained in:
2026-06-09 17:34:31 +03:00
parent 8157d70e3f
commit 03986d7979

View File

@@ -27,9 +27,9 @@ spec:
- name: HF_HOME - name: HF_HOME
value: /models/.hf value: /models/.hf
- name: MODEL_REPO - name: MODEL_REPO
value: "byteshape/Devstral-Small-2-24B-Instruct-2512-GGUF" value: "byteshape/Qwen3.6-35B-A3B-GGUF"
- name: MODEL_FILE - name: MODEL_FILE
value: "Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf" value: "Qwen3.6-35B-A3B-IQ3_S-3.00bpw.gguf"
# optional, only if you need gated/private models # optional, only if you need gated/private models
# - name: HUGGING_FACE_HUB_TOKEN # - name: HUGGING_FACE_HUB_TOKEN
# valueFrom: # valueFrom:
@@ -82,7 +82,7 @@ spec:
image: ghcr.io/ggml-org/llama.cpp:server-vulkan image: ghcr.io/ggml-org/llama.cpp:server-vulkan
args: args:
- "--model" - "--model"
- "/models/Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf" - "/models/Qwen3.6-35B-A3B-IQ3_S-3.00bpw.gguf"
- "--host" - "--host"
- "0.0.0.0" - "0.0.0.0"
- "--port" - "--port"