change fast model
This commit is contained in:
@@ -27,9 +27,9 @@ spec:
|
|||||||
- name: HF_HOME
|
- name: HF_HOME
|
||||||
value: /models/.hf
|
value: /models/.hf
|
||||||
- name: MODEL_REPO
|
- name: MODEL_REPO
|
||||||
value: "byteshape/Devstral-Small-2-24B-Instruct-2512-GGUF"
|
value: "byteshape/Qwen3.6-35B-A3B-GGUF"
|
||||||
- name: MODEL_FILE
|
- name: MODEL_FILE
|
||||||
value: "Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf"
|
value: "Qwen3.6-35B-A3B-IQ3_S-3.00bpw.gguf"
|
||||||
# optional, only if you need gated/private models
|
# optional, only if you need gated/private models
|
||||||
# - name: HUGGING_FACE_HUB_TOKEN
|
# - name: HUGGING_FACE_HUB_TOKEN
|
||||||
# valueFrom:
|
# valueFrom:
|
||||||
@@ -82,7 +82,7 @@ spec:
|
|||||||
image: ghcr.io/ggml-org/llama.cpp:server-vulkan
|
image: ghcr.io/ggml-org/llama.cpp:server-vulkan
|
||||||
args:
|
args:
|
||||||
- "--model"
|
- "--model"
|
||||||
- "/models/Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf"
|
- "/models/Qwen3.6-35B-A3B-IQ3_S-3.00bpw.gguf"
|
||||||
- "--host"
|
- "--host"
|
||||||
- "0.0.0.0"
|
- "0.0.0.0"
|
||||||
- "--port"
|
- "--port"
|
||||||
|
|||||||
Reference in New Issue
Block a user