diff --git a/manifests/llama/cpu.yaml b/manifests/llama/cpu.yaml
deleted file mode 100644
index aa04ff6..0000000
--- a/manifests/llama/cpu.yaml
+++ /dev/null
@@ -1,147 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llama-server-cpu
-  namespace: llama
-spec:
-  replicas: 1
-  strategy:
-    type: Recreate
-  selector:
-    matchLabels:
-      app: llama-server-cpu
-  template:
-    metadata:
-      labels:
-        app: llama-server-cpu
-      annotations:
-        prometheus.io/scrape: "true"
-        prometheus.io/port: "8080"
-        prometheus.io/path: "/metrics"
-    spec:
-      nodeSelector:
-        gpu: amd
-
-      initContainers:
-        - name: download-model
-          image: python:3.11-slim
-          env:
-            - name: HF_HOME
-              value: /models/.hf
-            - name: MODEL_REPO
-              value: "byteshape/Qwen3-Coder-30B-A3B-Instruct-GGUF"
-            - name: MODEL_FILE
-              value: "Qwen3-Coder-30B-A3B-Instruct-IQ4_XS-4.20bpw.gguf"
-          command:
-            - /bin/sh
-            - -c
-            - |
-              set -eux
-
-              MODEL_PATH="/models/${MODEL_FILE}"
-
-              if [ -f "${MODEL_PATH}" ]; then
-                echo "Model already exists at ${MODEL_PATH}, skipping download"
-                exit 0
-              fi
-
-              echo "Installing Hugging Face Hub downloader"
-              pip install --no-cache-dir huggingface_hub
-
-              echo "Downloading ${MODEL_REPO}/${MODEL_FILE}"
-              python - <<'PY'
-              import os
-              from huggingface_hub import hf_hub_download
-
-              repo_id = os.environ["MODEL_REPO"]
-              filename = os.environ["MODEL_FILE"]
-
-              token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
-
-              path = hf_hub_download(
-                  repo_id=repo_id,
-                  filename=filename,
-                  local_dir="/models",
-                  local_dir_use_symlinks=False,
-                  token=token,
-              )
-              print(f"Downloaded to: {path}")
-              PY
-
-              ls -lah /models
-          volumeMounts:
-            - name: models
-              mountPath: /models
-
-      containers:
-        - name: llama
-          image: ghcr.io/ggml-org/llama.cpp:server
-          args:
-            - "--model"
-            - "/models/Qwen3-Coder-30B-A3B-Instruct-IQ4_XS-4.20bpw.gguf"
-            - "--host"
-            - "0.0.0.0"
-            - "--port"
-            - "8080"
-            - "--metrics"
-            - "--ctx-size"
-            - "32768"
-            - "--parallel"
-            - "1"
-            - "--cache-type-k"
-            - "q8_0"
-            - "--cache-type-v"
-            - "q8_0"
-          ports:
-            - name: http
-              containerPort: 8080
-
-          volumeMounts:
-            - name: models
-              mountPath: /models
-
-          resources:
-            requests:
-              cpu: "8"
-              memory: "24Gi"
-            limits:
-              cpu: "12"
-              memory: "24Gi"
-
-      volumes:
-        - name: models
-          persistentVolumeClaim:
-            claimName: llama-cpu-models-pvc
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: llama-server-cpu
-  namespace: llama
-spec:
-  selector:
-    app: llama-server-cpu
-  ports:
-    - name: http
-      port: 8080
-      targetPort: http
-  type: ClusterIP
----
-apiVersion: monitoring.coreos.com/v1
-kind: PodMonitor
-metadata:
-  name: llama-server-cpu
-  namespace: llama
-  labels:
-    app: llama-server-cpu
-spec:
-  namespaceSelector:
-    matchNames:
-      - llama
-  selector:
-    matchLabels:
-      app: llama-server-cpu
-  podMetricsEndpoints:
-    - port: http
-      path: /metrics
-      interval: 15s