change fast model
This commit is contained in:
@@ -27,9 +27,9 @@ spec:
|
||||
- name: HF_HOME
|
||||
value: /models/.hf
|
||||
- name: MODEL_REPO
|
||||
value: "byteshape/Devstral-Small-2-24B-Instruct-2512-GGUF"
|
||||
value: "byteshape/Qwen3.6-35B-A3B-GGUF"
|
||||
- name: MODEL_FILE
|
||||
value: "Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf"
|
||||
value: "Qwen3.6-35B-A3B-IQ3_S-3.00bpw.gguf"
|
||||
# optional, only if you need gated/private models
|
||||
# - name: HUGGING_FACE_HUB_TOKEN
|
||||
# valueFrom:
|
||||
@@ -82,7 +82,7 @@ spec:
|
||||
image: ghcr.io/ggml-org/llama.cpp:server-vulkan
|
||||
args:
|
||||
- "--model"
|
||||
- "/models/Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf"
|
||||
- "/models/Qwen3.6-35B-A3B-IQ3_S-3.00bpw.gguf"
|
||||
- "--host"
|
||||
- "0.0.0.0"
|
||||
- "--port"
|
||||
|
||||
Reference in New Issue
Block a user