Initial commit: k3s GitOps manifests with ArgoCD App-of-Apps

2026-05-05 13:18:51 +03:00
commit 5d9a80b976
65 changed files with 3445 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
+config
--- a/argocd/app-of-apps.yaml
+++ b/argocd/app-of-apps.yaml
@@ -0,0 +1,25 @@
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+  name: root-app
+  namespace: argocd
+  finalizers:
+    - resources-finalizer.argocd.argoproj.io
+spec:
+  project: default
+  source:
+    repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git
+    targetRevision: main
+    path: argocd/apps
+    directory:
+      recurse: true
+      include: "*.yaml"
+  destination:
+    server: https://kubernetes.default.svc
+    namespace: argocd
+  syncPolicy:
+    automated:
+      prune: true
+      selfHeal: true
+    syncOptions:
+      - CreateNamespace=true
--- a/argocd/apps/cert-manager.yaml
+++ b/argocd/apps/cert-manager.yaml
@@ -0,0 +1,26 @@
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+  name: cert-manager
+  namespace: argocd
+  finalizers:
+    - resources-finalizer.argocd.argoproj.io
+spec:
+  project: default
+  source:
+    repoURL: https://charts.jetstack.io
+    chart: cert-manager
+    targetRevision: v1.20.1
+    helm:
+      values: |
+        crds:
+          enabled: true
+  destination:
+    server: https://kubernetes.default.svc
+    namespace: cert-manager
+  syncPolicy:
+    automated:
+      prune: true
+      selfHeal: true
+    syncOptions:
+      - CreateNamespace=true
--- a/argocd/apps/llama.yaml
+++ b/argocd/apps/llama.yaml
@@ -0,0 +1,22 @@
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+  name: llama
+  namespace: argocd
+  finalizers:
+    - resources-finalizer.argocd.argoproj.io
+spec:
+  project: default
+  source:
+    repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git
+    targetRevision: main
+    path: manifests/llama
+  destination:
+    server: https://kubernetes.default.svc
+    namespace: llama
+  syncPolicy:
+    automated:
+      prune: true
+      selfHeal: true
+    syncOptions:
+      - CreateNamespace=true
--- a/argocd/apps/longhorn.yaml
+++ b/argocd/apps/longhorn.yaml
@@ -0,0 +1,29 @@
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+  name: longhorn
+  namespace: argocd
+  finalizers:
+    - resources-finalizer.argocd.argoproj.io
+spec:
+  project: default
+  sources:
+    - repoURL: https://charts.longhorn.io
+      chart: longhorn
+      targetRevision: 1.11.2
+      helm:
+        values: |
+          preUpgradeChecker:
+            jobEnabled: false
+    - repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git
+      targetRevision: main
+      path: manifests/longhorn
+  destination:
+    server: https://kubernetes.default.svc
+    namespace: longhorn-system
+  syncPolicy:
+    automated:
+      prune: true
+      selfHeal: true
+    syncOptions:
+      - CreateNamespace=true
--- a/argocd/apps/metallb.yaml
+++ b/argocd/apps/metallb.yaml
@@ -0,0 +1,29 @@
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+  name: metallb
+  namespace: argocd
+  finalizers:
+    - resources-finalizer.argocd.argoproj.io
+spec:
+  project: default
+  sources:
+    - repoURL: https://metallb.github.io/metallb
+      chart: metallb
+      targetRevision: 0.14.5
+      helm:
+        values: |
+          crds:
+            enabled: true
+    - repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git
+      targetRevision: main
+      path: manifests/metallb
+  destination:
+    server: https://kubernetes.default.svc
+    namespace: metallb-system
+  syncPolicy:
+    automated:
+      prune: true
+      selfHeal: true
+    syncOptions:
+      - CreateNamespace=true
--- a/argocd/apps/metrics.yaml
+++ b/argocd/apps/metrics.yaml
@@ -0,0 +1,37 @@
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+  name: metrics
+  namespace: argocd
+  finalizers:
+    - resources-finalizer.argocd.argoproj.io
+spec:
+  project: default
+  sources:
+    - repoURL: https://prometheus-community.github.io/helm-charts
+      chart: kube-prometheus-stack
+      targetRevision: 82.16.2
+      helm:
+        valueFiles:
+          - $values/manifests/metrics/kube-prometheus-stack-values.yaml
+    - repoURL: https://victoriametrics.github.io/helm-charts/
+      chart: victoria-metrics-single
+      targetRevision: 0.34.0
+      helm:
+        valueFiles:
+          - $values/manifests/metrics/victoria-metrics-single-values.yaml
+    - repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git
+      targetRevision: main
+      ref: values
+    - repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git
+      targetRevision: main
+      path: manifests/metrics
+  destination:
+    server: https://kubernetes.default.svc
+    namespace: metrics
+  syncPolicy:
+    automated:
+      prune: true
+      selfHeal: true
+    syncOptions:
+      - CreateNamespace=true
--- a/argocd/apps/sillytavern.yaml
+++ b/argocd/apps/sillytavern.yaml
@@ -0,0 +1,22 @@
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+  name: sillytavern
+  namespace: argocd
+  finalizers:
+    - resources-finalizer.argocd.argoproj.io
+spec:
+  project: default
+  source:
+    repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git
+    targetRevision: main
+    path: manifests/sillytavern
+  destination:
+    server: https://kubernetes.default.svc
+    namespace: sillytavern
+  syncPolicy:
+    automated:
+      prune: true
+      selfHeal: true
+    syncOptions:
+      - CreateNamespace=true
--- a/bootstrap.sh
+++ b/bootstrap.sh
@@ -0,0 +1,229 @@
+#!/bin/bash
+set -e
+
+# =============================================================================
+# k3s GitOps Bootstrap Script
+# =============================================================================
+# This script sets up Gitea + ArgoCD on the k3s cluster and configures
+# GitOps with the App-of-Apps pattern.
+#
+# Prerequisites:
+#   - kubectl + kubeconfig access to the cluster
+#   - helm installed
+#   - git installed
+#   - DNS for *.mrt0rtikize.ru pointing to cluster nodes
+# =============================================================================
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+REPO_DIR="$(dirname "$SCRIPT_DIR")"
+KUBECONFIG="${REPO_DIR}/config"
+KCTL="kubectl --kubeconfig ${KUBECONFIG}"
+
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+
+echo -e "${GREEN}==============================================${NC}"
+echo -e "${GREEN}   k3s GitOps Bootstrap${NC}"
+echo -e "${GREEN}==============================================${NC}"
+echo ""
+
+# -----------------------------------------------------------------------------
+# Step 1: Deploy Gitea
+# -----------------------------------------------------------------------------
+echo -e "${YELLOW}[1/6] Deploying Gitea...${NC}"
+
+${KCTL} apply -f "${REPO_DIR}/bootstrap/gitea/"
+
+echo "  Waiting for Gitea pod to be ready..."
+${KCTL} wait --for=condition=ready pod -l app=gitea -n gitea --timeout=120s 2>/dev/null || {
+    echo -e "${RED}  Gitea pod not ready after 120s. Checking status...${NC}"
+    ${KCTL} get pod -n gitea
+    exit 1
+}
+
+echo -e "${GREEN}  Gitea deployed!${NC}"
+echo ""
+
+# -----------------------------------------------------------------------------
+# Step 2: Gitea initial setup (manual)
+# -----------------------------------------------------------------------------
+echo -e "${YELLOW}[2/6] Gitea setup${NC}"
+echo ""
+echo "  Gitea is running. Please open the install page in your browser:"
+echo ""
+echo -e "    ${GREEN}https://git.mrt0rtikize.ru/${NC}"
+echo ""
+echo "  Complete the install wizard with these settings:"
+echo "    - Database: SQLite3"
+echo "    - Admin Username: gitea"
+echo "    - Admin Password: <choose a strong password>"
+echo "    - Confirm Password: <same>"
+echo "    - Admin Email: admin@mrt0rtikize.ru"
+echo ""
+echo "  After install, create a repository named:"
+echo ""
+echo -e "    ${GREEN}k3s-manifests${NC}"
+echo ""
+echo "  Make it PUBLIC (so ArgoCD can read it without auth)."
+echo ""
+
+GITEA_PASSWORD=""
+read -p "  Gitea admin password (from install wizard): " GITEA_PASSWORD
+
+if [ -z "$GITEA_PASSWORD" ]; then
+    echo -e "${RED}  Password is required. Exiting.${NC}"
+    exit 1
+fi
+
+# Save password for later use
+GITEA_EXTERNAL="https://git.mrt0rtikize.ru"
+GITEA_INTERNAL="http://gitea.gitea.svc.cluster.local:3000"
+GITEA_USER="gitea"
+GITEA_REPO="k3s-manifests"
+GITEA_REPO_URL="${GITEA_EXTERNAL}/${GITEA_USER}/${GITEA_REPO}.git"
+GITEA_INTERNAL_REPO="${GITEA_INTERNAL}/${GITEA_USER}/${GITEA_REPO}.git"
+
+echo ""
+
+# -----------------------------------------------------------------------------
+# Step 3: Initialize git repo and push manifests
+# -----------------------------------------------------------------------------
+echo -e "${YELLOW}[3/6] Initializing git repo...${NC}"
+
+# Create .gitignore
+cat > "${REPO_DIR}/.gitignore" << 'GITIGNORE'
+# Sensitive files
+config
+GITIGNORE
+
+cd "${REPO_DIR}"
+
+if [ ! -d ".git" ]; then
+    git init
+    git checkout -b main
+fi
+
+git add .
+git commit -m "Initial commit: k3s GitOps manifests" 2>/dev/null || {
+    echo "  Nothing to commit (already up to date)"
+}
+
+echo "  Pushing to Gitea..."
+GIT_TERMINAL_PROMPT=0 git push -u "${GITEA_REPO_URL}" main 2>/dev/null || {
+    echo ""
+    echo -e "  ${RED}Push failed.${NC} Did you create the '${GITEA_REPO}' repo in Gitea?"
+    echo "  You can retry manually:"
+    echo "    cd ${REPO_DIR}"
+    echo "    git push -u ${GITEA_REPO_URL} main"
+    echo ""
+    read -p "  Press Enter after pushing... " -r
+}
+
+echo -e "${GREEN}  Manifests pushed to Gitea!${NC}"
+echo ""
+
+# -----------------------------------------------------------------------------
+# Step 4: Install ArgoCD
+# -----------------------------------------------------------------------------
+echo -e "${YELLOW}[4/6] Installing ArgoCD...${NC}"
+
+helm repo add argo https://argoproj.github.io/argo-helm 2>/dev/null || true
+helm repo update
+
+helm upgrade --install argocd argo/argo-cd \
+  --namespace argocd \
+  --create-namespace \
+  --set server.extraArgs[0]="--insecure" \
+  --set configs.params."server\.insecure"=true \
+  --set configs.cm.timeout.reconciliation=180s \
+  --wait \
+  --timeout 300s
+
+ARGOCD_PASSWORD=$(${KCTL} -n argocd get secret argocd-initial-admin-secret -o jsonpath="{.data.password}" 2>/dev/null | base64 -d)
+
+echo -e "${GREEN}  ArgoCD installed!${NC}"
+echo ""
+echo "  ArgoCD UI (port-forward):"
+echo "    kubectl --kubeconfig ${KUBECONFIG} port-forward svc/argocd-server -n argocd 8080:80"
+echo "  Username: admin"
+echo ""
+if [ -n "$ARGOCD_PASSWORD" ]; then
+    echo "  Password: ${ARGOCD_PASSWORD}"
+fi
+echo ""
+
+# -----------------------------------------------------------------------------
+# Step 5: Configure ArgoCD → Gitea connection
+# -----------------------------------------------------------------------------
+echo -e "${YELLOW}[5/6] Configuring ArgoCD → Gitea connection...${NC}"
+
+# Add Gitea as a repository in ArgoCD
+# Using argocd CLI if available, otherwise using creds + secret
+if command -v argocd &> /dev/null; then
+    echo "  Using argocd CLI..."
+    ARGOCD_SERVER="localhost:8080"
+    echo "  Please port-forward ArgoCD in another terminal:"
+    echo "    kubectl --kubeconfig ${KUBECONFIG} port-forward svc/argocd-server -n argocd 8080:80"
+    echo ""
+    read -p "  Press Enter when ready..." -r
+
+    argocd login "${ARGOCD_SERVER}" --username admin --password "${ARGOCD_PASSWORD}" --insecure
+    argocd repo add "${GITEA_INTERNAL_REPO}" --name gitea-k3s --type git
+else
+    # Fallback: create repository secret manually
+    echo "  Creating repository secret manually..."
+    ${KCTL} -n argocd create secret generic gitea-k3s-repo \
+      --from-literal=url="${GITEA_INTERNAL_REPO}" \
+      --from-literal=type=git \
+      --from-literal=name=gitea-k3s \
+      --dry-run=client -o yaml | \
+    sed 's/name: gitea-k3s-repo/name: gitea-k3s-repo\n  labels:\n    argocd.argoproj.io\/secret-type: repository/' | \
+    ${KCTL} apply -f - 2>/dev/null
+
+    # For a public repo, ArgoCD can access it without credentials
+    # If the repo is private, uncomment and configure:
+    # ${KCTL} -n argocd create secret generic gitea-k3s-repo \
+    #   --from-literal=url="${GITEA_INTERNAL_REPO}" \
+    #   --from-literal=type=git \
+    #   --from-literal=name=gitea-k3s \
+    #   --from-literal=username="${GITEA_USER}" \
+    #   --from-literal=password="${GITEA_PASSWORD}" \
+    #   --dry-run=client -o yaml | \
+    # sed 's/name: gitea-k3s-repo/name: gitea-k3s-repo\n  labels:\n    argocd.argoproj.io\/secret-type: repository/' | \
+    # ${KCTL} apply -f -
+fi
+
+echo -e "${GREEN}  Repository configured!${NC}"
+echo ""
+
+# -----------------------------------------------------------------------------
+# Step 6: Apply the root app
+# -----------------------------------------------------------------------------
+echo -e "${YELLOW}[6/6] Applying root App-of-Apps...${NC}"
+
+${KCTL} apply -f "${REPO_DIR}/argocd/app-of-apps.yaml"
+
+echo ""
+echo -e "${GREEN}==============================================${NC}"
+echo -e "${GREEN}   Bootstrap Complete!${NC}"
+echo -e "${GREEN}==============================================${NC}"
+echo ""
+echo "  Root app created. ArgoCD will now sync all child apps:"
+echo ""
+echo "    - cert-manager"
+echo "    - metallb"
+echo "    - longhorn"
+echo "    - metrics (prometheus + victoria-metrics)"
+echo "    - llama"
+echo "    - sillytavern"
+echo ""
+echo "  Monitor progress:"
+echo "    kubectl --kubeconfig ${KUBECONFIG} port-forward svc/argocd-server -n argocd 8080:80"
+echo "    Open http://localhost:8080"
+echo "    Login: admin / ${ARGOCD_PASSWORD}"
+echo ""
+echo "  Check sync status:"
+echo "    kubectl --kubeconfig ${KUBECONFIG} get applications -n argocd"
+echo ""
--- a/bootstrap/argocd/install.sh
+++ b/bootstrap/argocd/install.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+set -e
+
+# Bootstrap ArgoCD on the k3s cluster
+# This is a one-time manual step before GitOps takes over
+
+KUBECONFIG="/home/mrt0rtikize/infra/k3s/config"
+KCTL="kubectl --kubeconfig ${KUBECONFIG}"
+
+echo "=== Installing ArgoCD ==="
+
+# Add ArgoCD Helm repo
+helm repo add argo https://argoproj.github.io/argo-helm 2>/dev/null || true
+helm repo update
+
+# Install ArgoCD
+helm upgrade --install argocd argo/argo-cd \
+  --namespace argocd \
+  --create-namespace \
+  --set server.extraArgs[0]="--insecure" \
+  --set configs.params."server\.insecure"=true \
+  --set configs.cm.timeout.reconciliation=180s \
+  --wait \
+  --timeout 300s
+
+echo ""
+echo "=== ArgoCD installed ==="
+echo ""
+echo "To access ArgoCD UI:"
+echo "  kubectl --kubeconfig ${KUBECONFIG} port-forward svc/argocd-server -n argocd 8080:80"
+echo ""
+echo "Admin password:"
+kubectl --kubeconfig ${KUBECONFIG} -n argocd get secret argocd-initial-admin-secret -o jsonpath="{.data.password}" | base64 -d
+echo ""
+echo ""
+echo "Login with username: admin"
--- a/bootstrap/gitea/deployment.yaml
+++ b/bootstrap/gitea/deployment.yaml
@@ -0,0 +1,62 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: gitea
+  namespace: gitea
+spec:
+  replicas: 1
+  strategy:
+    type: Recreate
+  selector:
+    matchLabels:
+      app: gitea
+  template:
+    metadata:
+      labels:
+        app: gitea
+    spec:
+      containers:
+        - name: gitea
+          image: gitea/gitea:1.24
+          ports:
+            - containerPort: 3000
+              name: http
+            - containerPort: 22
+              name: ssh
+          env:
+            - name: GITEA__database__DB_TYPE
+              value: sqlite3
+            - name: GITEA__server__DOMAIN
+              value: git.mrt0rtikize.ru
+            - name: GITEA__server__ROOT_URL
+              value: https://git.mrt0rtikize.ru
+            - name: GITEA__server__HTTP_PORT
+              value: "3000"
+            - name: GITEA__server__SSH_PORT
+              value: "22"
+          volumeMounts:
+            - name: data
+              mountPath: /data
+          resources:
+            requests:
+              cpu: 100m
+              memory: 128Mi
+            limits:
+              cpu: 500m
+              memory: 512Mi
+          livenessProbe:
+            httpGet:
+              path: /
+              port: 3000
+            initialDelaySeconds: 10
+            periodSeconds: 10
+          readinessProbe:
+            httpGet:
+              path: /
+              port: 3000
+            initialDelaySeconds: 5
+            periodSeconds: 5
+      volumes:
+        - name: data
+          persistentVolumeClaim:
+            claimName: gitea-data
--- a/bootstrap/gitea/ingress.yaml
+++ b/bootstrap/gitea/ingress.yaml
@@ -0,0 +1,24 @@
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: gitea
+  namespace: gitea
+  annotations:
+    cert-manager.io/cluster-issuer: letsencrypt-production
+spec:
+  ingressClassName: traefik
+  tls:
+    - hosts:
+        - git.mrt0rtikize.ru
+      secretName: gitea-tls
+  rules:
+    - host: git.mrt0rtikize.ru
+      http:
+        paths:
+          - path: /
+            pathType: Prefix
+            backend:
+              service:
+                name: gitea
+                port:
+                  number: 3000
--- a/bootstrap/gitea/namespace.yaml
+++ b/bootstrap/gitea/namespace.yaml
@@ -0,0 +1,4 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: gitea
--- a/bootstrap/gitea/pvc.yaml
+++ b/bootstrap/gitea/pvc.yaml
@@ -0,0 +1,12 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: gitea-data
+  namespace: gitea
+spec:
+  storageClassName: longhorn
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 20Gi
--- a/bootstrap/gitea/service.yaml
+++ b/bootstrap/gitea/service.yaml
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: gitea
+  namespace: gitea
+spec:
+  selector:
+    app: gitea
+  ports:
+    - name: http
+      port: 3000
+      targetPort: 3000
+    - name: ssh
+      port: 22
+      targetPort: 22
--- a/infra/argocd/README.md
+++ b/infra/argocd/README.md
--- a/infra/longhorn/README.md
+++ b/infra/longhorn/README.md
--- a/infra/longhorn/longhorn-ingress.yaml
+++ b/infra/longhorn/longhorn-ingress.yaml
@@ -0,0 +1,26 @@
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: longhorn-ingress
+  namespace: longhorn-system
+  annotations:
+    cert-manager.io/cluster-issuer: letsencrypt-production
+    traefik.ingress.kubernetes.io/router.entrypoints: websecure
+    traefik.ingress.kubernetes.io/router.tls: "true"
+spec:
+  ingressClassName: traefik # We use Traefik as the ingress controller
+  tls:
+    - hosts:
+        - longhorn.mrt0rtikize.ru
+      secretName: longhorn-tls
+  rules:
+    - host: longhorn.mrt0rtikize.ru
+      http:
+        paths:
+          - path: /
+            pathType: Prefix
+            backend:
+              service:
+                name: longhorn-frontend # Service managing Longhorn dashboard
+                port:
+                  number: 80 # Service port where Longhorn UI runs
--- a/infra/longhorn/test-pvc.yaml
+++ b/infra/longhorn/test-pvc.yaml
@@ -0,0 +1,23 @@
+apiVersion: storage.k8s.io/v1
+kind: StorageClass
+metadata:
+  name: longhorn
+provisioner: driver.longhorn.io
+parameters:
+  numberOfReplicas: '2'
+  staleReplicaTimeout: '30'
+allowVolumeExpansion: true
+reclaimPolicy: Retain
+volumeBindingMode: Immediate
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: longhorn-pvc
+spec:
+  accessModes:
+    - ReadWriteOnce
+  storageClassName: longhorn
+  resources:
+    requests:
+      storage: 2Gi
--- a/infra/metallb/README.md
+++ b/infra/metallb/README.md
--- a/infra/metallb/ip-address-pool.yaml
+++ b/infra/metallb/ip-address-pool.yaml
@@ -0,0 +1,10 @@
+apiVersion: metallb.io/v1beta1
+kind: IPAddressPool
+metadata:
+  name: default-address-pool
+  namespace: metallb-system
+spec:
+  addresses:
+    - 10.0.0.120-10.0.0.200
+  autoAssign: true
+  avoidBuggyIPs: true
--- a/infra/metallb/l2advert.yaml
+++ b/infra/metallb/l2advert.yaml
@@ -0,0 +1,8 @@
+apiVersion: metallb.io/v1beta1
+kind: L2Advertisement
+metadata:
+  name: default-advertisement
+  namespace: metallb-system
+spec:
+  ipAddressPools:
+    - default-address-pool
--- a/llama/cpu.yaml
+++ b/llama/cpu.yaml
@@ -0,0 +1,147 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llama-server-cpu
+  namespace: llama
+spec:
+  replicas: 1
+  strategy:
+    type: Recreate
+  selector:
+    matchLabels:
+      app: llama-server-cpu
+  template:
+    metadata:
+      labels:
+        app: llama-server-cpu
+      annotations:
+        prometheus.io/scrape: "true"
+        prometheus.io/port: "8080"
+        prometheus.io/path: "/metrics"
+    spec:
+      nodeSelector:
+        gpu: amd
+
+      initContainers:
+        - name: download-model
+          image: python:3.11-slim
+          env:
+            - name: HF_HOME
+              value: /models/.hf
+            - name: MODEL_REPO
+              value: "byteshape/Qwen3-Coder-30B-A3B-Instruct-GGUF"
+            - name: MODEL_FILE
+              value: "Qwen3-Coder-30B-A3B-Instruct-IQ4_XS-4.20bpw.gguf"
+          command:
+            - /bin/sh
+            - -c
+            - |
+              set -eux
+
+              MODEL_PATH="/models/${MODEL_FILE}"
+
+              if [ -f "${MODEL_PATH}" ]; then
+                echo "Model already exists at ${MODEL_PATH}, skipping download"
+                exit 0
+              fi
+
+              echo "Installing Hugging Face Hub downloader"
+              pip install --no-cache-dir huggingface_hub
+
+              echo "Downloading ${MODEL_REPO}/${MODEL_FILE}"
+              python - <<'PY'
+              import os
+              from huggingface_hub import hf_hub_download
+
+              repo_id = os.environ["MODEL_REPO"]
+              filename = os.environ["MODEL_FILE"]
+
+              token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
+
+              path = hf_hub_download(
+                  repo_id=repo_id,
+                  filename=filename,
+                  local_dir="/models",
+                  local_dir_use_symlinks=False,
+                  token=token,
+              )
+              print(f"Downloaded to: {path}")
+              PY
+
+              ls -lah /models
+          volumeMounts:
+            - name: models
+              mountPath: /models
+
+      containers:
+        - name: llama
+          image: ghcr.io/ggml-org/llama.cpp:server
+          args:
+            - "--model"
+            - "/models/Qwen3-Coder-30B-A3B-Instruct-IQ4_XS-4.20bpw.gguf"
+            - "--host"
+            - "0.0.0.0"
+            - "--port"
+            - "8080"
+            - "--metrics"
+            - "--ctx-size"
+            - "32768"
+            - "--parallel"
+            - "1"
+            - "--cache-type-k"
+            - "q8_0"
+            - "--cache-type-v"
+            - "q8_0"
+          ports:
+            - name: http
+              containerPort: 8080
+
+          volumeMounts:
+            - name: models
+              mountPath: /models
+
+          resources:
+            requests:
+              cpu: "8"
+              memory: "24Gi"
+            limits:
+              cpu: "12"
+              memory: "24Gi"
+
+      volumes:
+        - name: models
+          persistentVolumeClaim:
+            claimName: llama-cpu-models-pvc
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: llama-server-cpu
+  namespace: llama
+spec:
+  selector:
+    app: llama-server-cpu
+  ports:
+    - name: http
+      port: 8080
+      targetPort: http
+  type: ClusterIP
+---
+apiVersion: monitoring.coreos.com/v1
+kind: PodMonitor
+metadata:
+  name: llama-server-cpu
+  namespace: llama
+  labels:
+    app: llama-server-cpu
+spec:
+  namespaceSelector:
+    matchNames:
+      - llama
+  selector:
+    matchLabels:
+      app: llama-server-cpu
+  podMetricsEndpoints:
+    - port: http
+      path: /metrics
+      interval: 15s
--- a/llama/gpu-exporter.yaml
+++ b/llama/gpu-exporter.yaml
@@ -0,0 +1,62 @@
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: radeon-exporter
+  namespace: llama
+  labels:
+    app: radeon-exporter
+spec:
+  selector:
+    matchLabels:
+      app: radeon-exporter
+  template:
+    metadata:
+      labels:
+        app: radeon-exporter
+    spec:
+      nodeSelector:
+        gpu: amd
+      containers:
+        - name: radeon-exporter
+          image: kmulvey/radeon_exporter:latest
+          imagePullPolicy: IfNotPresent
+          ports:
+            - name: metrics
+              containerPort: 9200
+          securityContext:
+            privileged: true
+          volumeMounts:
+            - name: sys
+              mountPath: /sys
+              readOnly: true
+            - name: dri
+              mountPath: /dev/dri
+              readOnly: true
+      volumes:
+        - name: sys
+          hostPath:
+            path: /sys
+            type: Directory
+        - name: dri
+          hostPath:
+            path: /dev/dri
+            type: Directory
+---
+apiVersion: monitoring.coreos.com/v1
+kind: PodMonitor
+metadata:
+  name: radeon-exporter
+  namespace: llama
+  labels:
+    monitoring: primary
+spec:
+  namespaceSelector:
+    matchNames:
+      - llama
+  selector:
+    matchLabels:
+      app: radeon-exporter
+  podMetricsEndpoints:
+    - port: metrics
+      path: /metrics
+      interval: 15s
--- a/llama/litellm-db.yaml
+++ b/llama/litellm-db.yaml
@@ -0,0 +1,116 @@
+apiVersion: v1
+kind: Secret
+metadata:
+  name: litellm-postgres
+  namespace: llama
+type: Opaque
+stringData:
+  POSTGRES_DB: litellm
+  POSTGRES_USER: litellm
+  POSTGRES_PASSWORD: 7792e47efbc7348155f54a15ed34dc1d06716b2b1848711d0ee90e3461883c0d
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: litellm-postgres
+  namespace: llama
+  labels:
+    app.kubernetes.io/name: litellm-postgres
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 10Gi
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: litellm-postgres
+  namespace: llama
+  labels:
+    app.kubernetes.io/name: litellm-postgres
+    app.kubernetes.io/component: database
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: litellm-postgres
+      app.kubernetes.io/component: database
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: litellm-postgres
+        app.kubernetes.io/component: database
+    spec:
+      containers:
+        - name: postgres
+          image: postgres:16
+          imagePullPolicy: IfNotPresent
+          ports:
+            - name: postgres
+              containerPort: 5432
+          env:
+            - name: POSTGRES_DB
+              valueFrom:
+                secretKeyRef:
+                  name: litellm-postgres
+                  key: POSTGRES_DB
+            - name: POSTGRES_USER
+              valueFrom:
+                secretKeyRef:
+                  name: litellm-postgres
+                  key: POSTGRES_USER
+            - name: POSTGRES_PASSWORD
+              valueFrom:
+                secretKeyRef:
+                  name: litellm-postgres
+                  key: POSTGRES_PASSWORD
+          volumeMounts:
+            - name: data
+              mountPath: /var/lib/postgresql
+          readinessProbe:
+            exec:
+              command:
+                - sh
+                - -c
+                - pg_isready -U "$POSTGRES_USER" -d "$POSTGRES_DB"
+            initialDelaySeconds: 5
+            periodSeconds: 10
+          livenessProbe:
+            exec:
+              command:
+                - sh
+                - -c
+                - pg_isready -U "$POSTGRES_USER" -d "$POSTGRES_DB"
+            initialDelaySeconds: 20
+            periodSeconds: 20
+          resources:
+            requests:
+              cpu: 100m
+              memory: 256Mi
+            limits:
+              cpu: 500m
+              memory: 256Mi
+      volumes:
+        - name: data
+          persistentVolumeClaim:
+            claimName: litellm-postgres
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: litellm-postgres
+  namespace: llama
+  labels:
+    app.kubernetes.io/name: litellm-postgres
+    app.kubernetes.io/component: database
+spec:
+  selector:
+    app.kubernetes.io/name: litellm-postgres
+    app.kubernetes.io/component: database
+  ports:
+    - name: postgres
+      port: 5432
+      targetPort: postgres
+  type: ClusterIP
--- a/llama/litellm.yaml
+++ b/llama/litellm.yaml
@@ -0,0 +1,202 @@
+apiVersion: v1
+kind: Secret
+metadata:
+  name: litellm-secret
+  namespace: llama
+  labels:
+    app.kubernetes.io/name: litellm
+    app.kubernetes.io/component: gateway
+type: Opaque
+stringData:
+  LITELLM_MASTER_KEY: "6991c7c0f02b4bcf"
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: litellm-config
+  namespace: llama
+  labels:
+    app.kubernetes.io/name: litellm
+    app.kubernetes.io/component: gateway
+data:
+  config.yaml: |
+    model_list:
+      - model_name: fast
+        litellm_params:
+          model: openai/fast
+          api_base: "http://llama-server-gpu.llama.svc.cluster.local:8080/v1"
+          api_key: none
+
+      - model_name: smart
+        litellm_params:
+          model: openai/smart
+          api_base: "http://llama-server-cpu.llama.svc.cluster.local:8080/v1"
+          api_key: none
+
+      - model_name: rp
+        litellm_params:
+          model: openai/rp
+          api_base: "http://llama-server-gpu-rp.llama.svc.cluster.local:8080/v1"
+          api_key: none
+    litellm_settings:
+      callbacks:
+        - prometheus
+    general_settings:
+      store_model_in_db: true
+      store_prompts_in_spend_logs: true
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: litellm
+  namespace: llama
+  labels:
+    app.kubernetes.io/name: litellm
+    app.kubernetes.io/component: gateway
+    app.kubernetes.io/part-of: llama-stack
+    monitoring: prometheus
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: litellm
+      app.kubernetes.io/component: gateway
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: litellm
+        app.kubernetes.io/component: gateway
+        app.kubernetes.io/part-of: llama-stack
+        monitoring: prometheus
+      annotations:
+        prometheus.io/scrape: "true"
+        prometheus.io/port: "4000"
+        prometheus.io/path: "/metrics"
+    spec:
+      containers:
+        - name: litellm
+          image: ghcr.io/berriai/litellm:v1.82.6.rc.3
+          imagePullPolicy: IfNotPresent
+          args:
+            - "--config"
+            - "/app/config.yaml"
+          env:
+            - name: LITELLM_MASTER_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: litellm-secret
+                  key: LITELLM_MASTER_KEY
+            - name: POSTGRES_USER
+              valueFrom:
+                secretKeyRef:
+                  name: litellm-postgres
+                  key: POSTGRES_USER
+
+            - name: POSTGRES_PASSWORD
+              valueFrom:
+                secretKeyRef:
+                  name: litellm-postgres
+                  key: POSTGRES_PASSWORD
+
+            - name: POSTGRES_DB
+              valueFrom:
+                secretKeyRef:
+                  name: litellm-postgres
+                  key: POSTGRES_DB
+
+            - name: DATABASE_URL
+              value: "postgresql://$(POSTGRES_USER):$(POSTGRES_PASSWORD)@litellm-postgres.llama.svc.cluster.local:5432/$(POSTGRES_DB)"
+          ports:
+            - name: http
+              containerPort: 4000
+              protocol: TCP
+          volumeMounts:
+            - name: litellm-config
+              mountPath: /app/config.yaml
+              subPath: config.yaml
+          resources:
+            requests:
+              cpu: "500m"
+              memory: "1Gi"
+            limits:
+              cpu: "1000m"
+              memory: "2Gi"
+      volumes:
+        - name: litellm-config
+          configMap:
+            name: litellm-config
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: litellm
+  namespace: llama
+  labels:
+    app.kubernetes.io/name: litellm
+    app.kubernetes.io/component: gateway
+    app.kubernetes.io/part-of: llama-stack
+    monitoring: prometheus
+spec:
+  selector:
+    app.kubernetes.io/name: litellm
+    app.kubernetes.io/component: gateway
+  ports:
+    - name: http
+      port: 4000
+      targetPort: http
+      protocol: TCP
+  type: ClusterIP
+---
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: litellm
+  namespace: llama
+  labels:
+    app.kubernetes.io/name: litellm
+    app.kubernetes.io/component: gateway
+    app.kubernetes.io/part-of: llama-stack
+  annotations:
+    cert-manager.io/cluster-issuer: letsencrypt-production
+    traefik.ingress.kubernetes.io/router.entrypoints: websecure
+    traefik.ingress.kubernetes.io/router.tls: "true"
+spec:
+  ingressClassName: traefik
+  tls:
+    - hosts:
+        - litellm.mrt0rtikize.ru
+      secretName: web-echo-tls
+  rules:
+    - host: litellm.mrt0rtikize.ru
+      http:
+        paths:
+          - path: /
+            pathType: Prefix
+            backend:
+              service:
+                name: litellm
+                port:
+                  number: 4000
+---
+apiVersion: monitoring.coreos.com/v1
+kind: PodMonitor
+metadata:
+  name: litellm
+  namespace: llama
+  labels:
+    app.kubernetes.io/name: litellm
+    app.kubernetes.io/component: gateway
+    app.kubernetes.io/part-of: llama-stack
+    release: kube-prometheus-stack
+spec:
+  namespaceSelector:
+    matchNames:
+      - llama
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: litellm
+      app.kubernetes.io/component: gateway
+  podMetricsEndpoints:
+    - port: http
+      path: /metrics
+      interval: 30s
--- a/llama/main.yaml
+++ b/llama/main.yaml
@@ -0,0 +1,166 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llama-server-gpu
+  namespace: llama
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llama-server-gpu
+  template:
+    metadata:
+      labels:
+        app: llama-server-gpu
+      annotations:
+        prometheus.io/scrape: "true"
+        prometheus.io/port: "8080"
+        prometheus.io/path: "/metrics"
+    spec:
+      nodeSelector:
+        gpu: amd
+
+      initContainers:
+        - name: download-model
+          image: python:3.11-slim
+          env:
+            - name: HF_HOME
+              value: /models/.hf
+            - name: MODEL_REPO
+              value: "byteshape/Devstral-Small-2-24B-Instruct-2512-GGUF"
+            - name: MODEL_FILE
+              value: "Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf"
+            # optional, only if you need gated/private models
+            # - name: HUGGING_FACE_HUB_TOKEN
+            #   valueFrom:
+            #     secretKeyRef:
+            #       name: hf-token
+            #       key: token
+          command:
+            - /bin/sh
+            - -c
+            - |
+              set -eux
+
+              MODEL_PATH="/models/${MODEL_FILE}"
+
+              if [ -f "${MODEL_PATH}" ]; then
+                echo "Model already exists at ${MODEL_PATH}, skipping download"
+                exit 0
+              fi
+
+              echo "Installing Hugging Face Hub downloader"
+              pip install --no-cache-dir huggingface_hub
+
+              echo "Downloading ${MODEL_REPO}/${MODEL_FILE}"
+              python - <<'PY'
+              import os
+              from huggingface_hub import hf_hub_download
+
+              repo_id = os.environ["MODEL_REPO"]
+              filename = os.environ["MODEL_FILE"]
+
+              token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
+
+              path = hf_hub_download(
+                  repo_id=repo_id,
+                  filename=filename,
+                  local_dir="/models",
+                  local_dir_use_symlinks=False,
+                  token=token,
+              )
+              print(f"Downloaded to: {path}")
+              PY
+
+              ls -lah /models
+          volumeMounts:
+            - name: models
+              mountPath: /models
+
+      containers:
+        - name: llama
+          image: ghcr.io/ggml-org/llama.cpp:server-vulkan
+          args:
+            - "--model"
+            - "/models/Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf"
+            - "--host"
+            - "0.0.0.0"
+            - "--port"
+            - "8080"
+            - "--n-gpu-layers"
+            - "999"
+            - "--metrics"
+
+            # performance tuning
+            - "--ctx-size"
+            - "32768"
+            - "--parallel"
+            - "4"
+
+            # KV cache quantization
+            - "--cache-type-k"
+            - "q8_0"
+            - "--cache-type-v"
+            - "q8_0"
+          ports:
+            - name: http
+              containerPort: 8080
+
+          securityContext:
+            privileged: true
+
+          volumeMounts:
+            - name: models
+              mountPath: /models
+            - name: dri
+              mountPath: /dev/dri
+
+          resources:
+            requests:
+              cpu: "2"
+              memory: "4Gi"
+            limits:
+              cpu: "2"
+              memory: "4Gi"
+
+      volumes:
+        - name: models
+          persistentVolumeClaim:
+            claimName: llama-gpu-models-pvc
+        - name: dri
+          hostPath:
+            path: /dev/dri
+            type: Directory
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: llama-server-gpu
+  namespace: llama
+spec:
+  selector:
+    app: llama-server-gpu
+  ports:
+    - name: http
+      port: 8080
+      targetPort: http
+  type: ClusterIP
+---
+apiVersion: monitoring.coreos.com/v1
+kind: PodMonitor
+metadata:
+  name: llama-server-gpu
+  namespace: llama
+  labels:
+    app: llama-server-gpu
+spec:
+  namespaceSelector:
+    matchNames:
+      - llama
+  selector:
+    matchLabels:
+      app: llama-server-gpu
+  podMetricsEndpoints:
+    - port: http
+      path: /metrics
+      interval: 15s
--- a/llama/namespace.yaml
+++ b/llama/namespace.yaml
@@ -0,0 +1,42 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: llama
+---
+# apiVersion: storage.k8s.io/v1
+# kind: StorageClass
+# metadata:
+#   name: longhorn-llama
+# provisioner: driver.longhorn.io
+# parameters:
+#   numberOfReplicas: "2"
+#   staleReplicaTimeout: "30"
+# allowVolumeExpansion: true
+# reclaimPolicy: Retain
+# volumeBindingMode: Immediate
+# ---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: llama-gpu-models-pvc
+  namespace: llama
+spec:
+  accessModes:
+    - ReadWriteOnce
+  # storageClassName: longhorn-llama
+  resources:
+    requests:
+      storage: 50Gi
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: llama-cpu-models-pvc
+  namespace: llama
+spec:
+  accessModes:
+    - ReadWriteOnce
+  # storageClassName: longhorn-llama
+  resources:
+    requests:
+      storage: 100Gi
--- a/llama/rp.yaml
+++ b/llama/rp.yaml
@@ -0,0 +1,166 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llama-server-gpu-rp
+  namespace: llama
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llama-server-gpu-rp
+  template:
+    metadata:
+      labels:
+        app: llama-server-gpu-rp
+      annotations:
+        prometheus.io/scrape: "true"
+        prometheus.io/port: "8080"
+        prometheus.io/path: "/metrics"
+    spec:
+      nodeSelector:
+        gpu: amd
+
+      initContainers:
+        - name: download-model
+          image: python:3.11-slim
+          env:
+            - name: HF_HOME
+              value: /models/.hf
+            - name: MODEL_REPO
+              value: "mradermacher/Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B-GGUF"
+            - name: MODEL_FILE
+              value: "Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B.Q4_K_S.gguf"
+            # optional, only if you need gated/private models
+            # - name: HUGGING_FACE_HUB_TOKEN
+            #   valueFrom:
+            #     secretKeyRef:
+            #       name: hf-token
+            #       key: token
+          command:
+            - /bin/sh
+            - -c
+            - |
+              set -eux
+
+              MODEL_PATH="/models/${MODEL_FILE}"
+
+              if [ -f "${MODEL_PATH}" ]; then
+                echo "Model already exists at ${MODEL_PATH}, skipping download"
+                exit 0
+              fi
+
+              echo "Installing Hugging Face Hub downloader"
+              pip install --no-cache-dir huggingface_hub
+
+              echo "Downloading ${MODEL_REPO}/${MODEL_FILE}"
+              python - <<'PY'
+              import os
+              from huggingface_hub import hf_hub_download
+
+              repo_id = os.environ["MODEL_REPO"]
+              filename = os.environ["MODEL_FILE"]
+
+              token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
+
+              path = hf_hub_download(
+                  repo_id=repo_id,
+                  filename=filename,
+                  local_dir="/models",
+                  local_dir_use_symlinks=False,
+                  token=token,
+              )
+              print(f"Downloaded to: {path}")
+              PY
+
+              ls -lah /models
+          volumeMounts:
+            - name: models
+              mountPath: /models
+
+      containers:
+        - name: llama
+          image: ghcr.io/ggml-org/llama.cpp:server-vulkan
+          args:
+            - "--model"
+            - "/models/Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B.Q4_K_S.gguf"
+            - "--host"
+            - "0.0.0.0"
+            - "--port"
+            - "8080"
+            - "--n-gpu-layers"
+            - "999"
+            - "--metrics"
+
+            # performance tuning
+            - "--ctx-size"
+            - "32768"
+            - "--parallel"
+            - "1"
+
+            # KV cache quantization
+            - "--cache-type-k"
+            - "q8_0"
+            - "--cache-type-v"
+            - "q8_0"
+          ports:
+            - name: http
+              containerPort: 8080
+
+          securityContext:
+            privileged: true
+
+          volumeMounts:
+            - name: models
+              mountPath: /models
+            - name: dri
+              mountPath: /dev/dri
+
+          resources:
+            requests:
+              cpu: "2"
+              memory: "4Gi"
+            limits:
+              cpu: "2"
+              memory: "4Gi"
+
+      volumes:
+        - name: models
+          persistentVolumeClaim:
+            claimName: llama-gpu-models-pvc
+        - name: dri
+          hostPath:
+            path: /dev/dri
+            type: Directory
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: llama-server-gpu-rp
+  namespace: llama
+spec:
+  selector:
+    app: llama-server-gpu-rp
+  ports:
+    - name: http
+      port: 8080
+      targetPort: http
+  type: ClusterIP
+---
+apiVersion: monitoring.coreos.com/v1
+kind: PodMonitor
+metadata:
+  name: llama-server-gpu-rp
+  namespace: llama
+  labels:
+    app: llama-server-gpu-rp
+spec:
+  namespaceSelector:
+    matchNames:
+      - llama
+  selector:
+    matchLabels:
+      app: llama-server-gpu-rp
+  podMetricsEndpoints:
+    - port: http
+      path: /metrics
+      interval: 15s
--- a/manifests/llama/cpu.yaml
+++ b/manifests/llama/cpu.yaml
@@ -0,0 +1,147 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llama-server-cpu
+  namespace: llama
+spec:
+  replicas: 1
+  strategy:
+    type: Recreate
+  selector:
+    matchLabels:
+      app: llama-server-cpu
+  template:
+    metadata:
+      labels:
+        app: llama-server-cpu
+      annotations:
+        prometheus.io/scrape: "true"
+        prometheus.io/port: "8080"
+        prometheus.io/path: "/metrics"
+    spec:
+      nodeSelector:
+        gpu: amd
+
+      initContainers:
+        - name: download-model
+          image: python:3.11-slim
+          env:
+            - name: HF_HOME
+              value: /models/.hf
+            - name: MODEL_REPO
+              value: "byteshape/Qwen3-Coder-30B-A3B-Instruct-GGUF"
+            - name: MODEL_FILE
+              value: "Qwen3-Coder-30B-A3B-Instruct-IQ4_XS-4.20bpw.gguf"
+          command:
+            - /bin/sh
+            - -c
+            - |
+              set -eux
+
+              MODEL_PATH="/models/${MODEL_FILE}"
+
+              if [ -f "${MODEL_PATH}" ]; then
+                echo "Model already exists at ${MODEL_PATH}, skipping download"
+                exit 0
+              fi
+
+              echo "Installing Hugging Face Hub downloader"
+              pip install --no-cache-dir huggingface_hub
+
+              echo "Downloading ${MODEL_REPO}/${MODEL_FILE}"
+              python - <<'PY'
+              import os
+              from huggingface_hub import hf_hub_download
+
+              repo_id = os.environ["MODEL_REPO"]
+              filename = os.environ["MODEL_FILE"]
+
+              token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
+
+              path = hf_hub_download(
+                  repo_id=repo_id,
+                  filename=filename,
+                  local_dir="/models",
+                  local_dir_use_symlinks=False,
+                  token=token,
+              )
+              print(f"Downloaded to: {path}")
+              PY
+
+              ls -lah /models
+          volumeMounts:
+            - name: models
+              mountPath: /models
+
+      containers:
+        - name: llama
+          image: ghcr.io/ggml-org/llama.cpp:server
+          args:
+            - "--model"
+            - "/models/Qwen3-Coder-30B-A3B-Instruct-IQ4_XS-4.20bpw.gguf"
+            - "--host"
+            - "0.0.0.0"
+            - "--port"
+            - "8080"
+            - "--metrics"
+            - "--ctx-size"
+            - "32768"
+            - "--parallel"
+            - "1"
+            - "--cache-type-k"
+            - "q8_0"
+            - "--cache-type-v"
+            - "q8_0"
+          ports:
+            - name: http
+              containerPort: 8080
+
+          volumeMounts:
+            - name: models
+              mountPath: /models
+
+          resources:
+            requests:
+              cpu: "8"
+              memory: "24Gi"
+            limits:
+              cpu: "12"
+              memory: "24Gi"
+
+      volumes:
+        - name: models
+          persistentVolumeClaim:
+            claimName: llama-cpu-models-pvc
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: llama-server-cpu
+  namespace: llama
+spec:
+  selector:
+    app: llama-server-cpu
+  ports:
+    - name: http
+      port: 8080
+      targetPort: http
+  type: ClusterIP
+---
+apiVersion: monitoring.coreos.com/v1
+kind: PodMonitor
+metadata:
+  name: llama-server-cpu
+  namespace: llama
+  labels:
+    app: llama-server-cpu
+spec:
+  namespaceSelector:
+    matchNames:
+      - llama
+  selector:
+    matchLabels:
+      app: llama-server-cpu
+  podMetricsEndpoints:
+    - port: http
+      path: /metrics
+      interval: 15s
--- a/manifests/llama/gpu-exporter.yaml
+++ b/manifests/llama/gpu-exporter.yaml
@@ -0,0 +1,62 @@
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: radeon-exporter
+  namespace: llama
+  labels:
+    app: radeon-exporter
+spec:
+  selector:
+    matchLabels:
+      app: radeon-exporter
+  template:
+    metadata:
+      labels:
+        app: radeon-exporter
+    spec:
+      nodeSelector:
+        gpu: amd
+      containers:
+        - name: radeon-exporter
+          image: kmulvey/radeon_exporter:latest
+          imagePullPolicy: IfNotPresent
+          ports:
+            - name: metrics
+              containerPort: 9200
+          securityContext:
+            privileged: true
+          volumeMounts:
+            - name: sys
+              mountPath: /sys
+              readOnly: true
+            - name: dri
+              mountPath: /dev/dri
+              readOnly: true
+      volumes:
+        - name: sys
+          hostPath:
+            path: /sys
+            type: Directory
+        - name: dri
+          hostPath:
+            path: /dev/dri
+            type: Directory
+---
+apiVersion: monitoring.coreos.com/v1
+kind: PodMonitor
+metadata:
+  name: radeon-exporter
+  namespace: llama
+  labels:
+    monitoring: primary
+spec:
+  namespaceSelector:
+    matchNames:
+      - llama
+  selector:
+    matchLabels:
+      app: radeon-exporter
+  podMetricsEndpoints:
+    - port: metrics
+      path: /metrics
+      interval: 15s
--- a/manifests/llama/litellm-db.yaml
+++ b/manifests/llama/litellm-db.yaml
@@ -0,0 +1,116 @@
+apiVersion: v1
+kind: Secret
+metadata:
+  name: litellm-postgres
+  namespace: llama
+type: Opaque
+stringData:
+  POSTGRES_DB: litellm
+  POSTGRES_USER: litellm
+  POSTGRES_PASSWORD: 7792e47efbc7348155f54a15ed34dc1d06716b2b1848711d0ee90e3461883c0d
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: litellm-postgres
+  namespace: llama
+  labels:
+    app.kubernetes.io/name: litellm-postgres
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 10Gi
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: litellm-postgres
+  namespace: llama
+  labels:
+    app.kubernetes.io/name: litellm-postgres
+    app.kubernetes.io/component: database
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: litellm-postgres
+      app.kubernetes.io/component: database
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: litellm-postgres
+        app.kubernetes.io/component: database
+    spec:
+      containers:
+        - name: postgres
+          image: postgres:16
+          imagePullPolicy: IfNotPresent
+          ports:
+            - name: postgres
+              containerPort: 5432
+          env:
+            - name: POSTGRES_DB
+              valueFrom:
+                secretKeyRef:
+                  name: litellm-postgres
+                  key: POSTGRES_DB
+            - name: POSTGRES_USER
+              valueFrom:
+                secretKeyRef:
+                  name: litellm-postgres
+                  key: POSTGRES_USER
+            - name: POSTGRES_PASSWORD
+              valueFrom:
+                secretKeyRef:
+                  name: litellm-postgres
+                  key: POSTGRES_PASSWORD
+          volumeMounts:
+            - name: data
+              mountPath: /var/lib/postgresql
+          readinessProbe:
+            exec:
+              command:
+                - sh
+                - -c
+                - pg_isready -U "$POSTGRES_USER" -d "$POSTGRES_DB"
+            initialDelaySeconds: 5
+            periodSeconds: 10
+          livenessProbe:
+            exec:
+              command:
+                - sh
+                - -c
+                - pg_isready -U "$POSTGRES_USER" -d "$POSTGRES_DB"
+            initialDelaySeconds: 20
+            periodSeconds: 20
+          resources:
+            requests:
+              cpu: 100m
+              memory: 256Mi
+            limits:
+              cpu: 500m
+              memory: 256Mi
+      volumes:
+        - name: data
+          persistentVolumeClaim:
+            claimName: litellm-postgres
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: litellm-postgres
+  namespace: llama
+  labels:
+    app.kubernetes.io/name: litellm-postgres
+    app.kubernetes.io/component: database
+spec:
+  selector:
+    app.kubernetes.io/name: litellm-postgres
+    app.kubernetes.io/component: database
+  ports:
+    - name: postgres
+      port: 5432
+      targetPort: postgres
+  type: ClusterIP
--- a/manifests/llama/litellm.yaml
+++ b/manifests/llama/litellm.yaml
@@ -0,0 +1,202 @@
+apiVersion: v1
+kind: Secret
+metadata:
+  name: litellm-secret
+  namespace: llama
+  labels:
+    app.kubernetes.io/name: litellm
+    app.kubernetes.io/component: gateway
+type: Opaque
+stringData:
+  LITELLM_MASTER_KEY: "6991c7c0f02b4bcf"
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: litellm-config
+  namespace: llama
+  labels:
+    app.kubernetes.io/name: litellm
+    app.kubernetes.io/component: gateway
+data:
+  config.yaml: |
+    model_list:
+      - model_name: fast
+        litellm_params:
+          model: openai/fast
+          api_base: "http://llama-server-gpu.llama.svc.cluster.local:8080/v1"
+          api_key: none
+
+      - model_name: smart
+        litellm_params:
+          model: openai/smart
+          api_base: "http://llama-server-cpu.llama.svc.cluster.local:8080/v1"
+          api_key: none
+
+      - model_name: rp
+        litellm_params:
+          model: openai/rp
+          api_base: "http://llama-server-gpu-rp.llama.svc.cluster.local:8080/v1"
+          api_key: none
+    litellm_settings:
+      callbacks:
+        - prometheus
+    general_settings:
+      store_model_in_db: true
+      store_prompts_in_spend_logs: true
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: litellm
+  namespace: llama
+  labels:
+    app.kubernetes.io/name: litellm
+    app.kubernetes.io/component: gateway
+    app.kubernetes.io/part-of: llama-stack
+    monitoring: prometheus
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: litellm
+      app.kubernetes.io/component: gateway
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: litellm
+        app.kubernetes.io/component: gateway
+        app.kubernetes.io/part-of: llama-stack
+        monitoring: prometheus
+      annotations:
+        prometheus.io/scrape: "true"
+        prometheus.io/port: "4000"
+        prometheus.io/path: "/metrics"
+    spec:
+      containers:
+        - name: litellm
+          image: ghcr.io/berriai/litellm:v1.82.6.rc.3
+          imagePullPolicy: IfNotPresent
+          args:
+            - "--config"
+            - "/app/config.yaml"
+          env:
+            - name: LITELLM_MASTER_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: litellm-secret
+                  key: LITELLM_MASTER_KEY
+            - name: POSTGRES_USER
+              valueFrom:
+                secretKeyRef:
+                  name: litellm-postgres
+                  key: POSTGRES_USER
+
+            - name: POSTGRES_PASSWORD
+              valueFrom:
+                secretKeyRef:
+                  name: litellm-postgres
+                  key: POSTGRES_PASSWORD
+
+            - name: POSTGRES_DB
+              valueFrom:
+                secretKeyRef:
+                  name: litellm-postgres
+                  key: POSTGRES_DB
+
+            - name: DATABASE_URL
+              value: "postgresql://$(POSTGRES_USER):$(POSTGRES_PASSWORD)@litellm-postgres.llama.svc.cluster.local:5432/$(POSTGRES_DB)"
+          ports:
+            - name: http
+              containerPort: 4000
+              protocol: TCP
+          volumeMounts:
+            - name: litellm-config
+              mountPath: /app/config.yaml
+              subPath: config.yaml
+          resources:
+            requests:
+              cpu: "500m"
+              memory: "1Gi"
+            limits:
+              cpu: "1000m"
+              memory: "2Gi"
+      volumes:
+        - name: litellm-config
+          configMap:
+            name: litellm-config
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: litellm
+  namespace: llama
+  labels:
+    app.kubernetes.io/name: litellm
+    app.kubernetes.io/component: gateway
+    app.kubernetes.io/part-of: llama-stack
+    monitoring: prometheus
+spec:
+  selector:
+    app.kubernetes.io/name: litellm
+    app.kubernetes.io/component: gateway
+  ports:
+    - name: http
+      port: 4000
+      targetPort: http
+      protocol: TCP
+  type: ClusterIP
+---
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: litellm
+  namespace: llama
+  labels:
+    app.kubernetes.io/name: litellm
+    app.kubernetes.io/component: gateway
+    app.kubernetes.io/part-of: llama-stack
+  annotations:
+    cert-manager.io/cluster-issuer: letsencrypt-production
+    traefik.ingress.kubernetes.io/router.entrypoints: websecure
+    traefik.ingress.kubernetes.io/router.tls: "true"
+spec:
+  ingressClassName: traefik
+  tls:
+    - hosts:
+        - litellm.mrt0rtikize.ru
+      secretName: web-echo-tls
+  rules:
+    - host: litellm.mrt0rtikize.ru
+      http:
+        paths:
+          - path: /
+            pathType: Prefix
+            backend:
+              service:
+                name: litellm
+                port:
+                  number: 4000
+---
+apiVersion: monitoring.coreos.com/v1
+kind: PodMonitor
+metadata:
+  name: litellm
+  namespace: llama
+  labels:
+    app.kubernetes.io/name: litellm
+    app.kubernetes.io/component: gateway
+    app.kubernetes.io/part-of: llama-stack
+    release: kube-prometheus-stack
+spec:
+  namespaceSelector:
+    matchNames:
+      - llama
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: litellm
+      app.kubernetes.io/component: gateway
+  podMetricsEndpoints:
+    - port: http
+      path: /metrics
+      interval: 30s
--- a/manifests/llama/main.yaml
+++ b/manifests/llama/main.yaml
@@ -0,0 +1,166 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llama-server-gpu
+  namespace: llama
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llama-server-gpu
+  template:
+    metadata:
+      labels:
+        app: llama-server-gpu
+      annotations:
+        prometheus.io/scrape: "true"
+        prometheus.io/port: "8080"
+        prometheus.io/path: "/metrics"
+    spec:
+      nodeSelector:
+        gpu: amd
+
+      initContainers:
+        - name: download-model
+          image: python:3.11-slim
+          env:
+            - name: HF_HOME
+              value: /models/.hf
+            - name: MODEL_REPO
+              value: "byteshape/Devstral-Small-2-24B-Instruct-2512-GGUF"
+            - name: MODEL_FILE
+              value: "Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf"
+            # optional, only if you need gated/private models
+            # - name: HUGGING_FACE_HUB_TOKEN
+            #   valueFrom:
+            #     secretKeyRef:
+            #       name: hf-token
+            #       key: token
+          command:
+            - /bin/sh
+            - -c
+            - |
+              set -eux
+
+              MODEL_PATH="/models/${MODEL_FILE}"
+
+              if [ -f "${MODEL_PATH}" ]; then
+                echo "Model already exists at ${MODEL_PATH}, skipping download"
+                exit 0
+              fi
+
+              echo "Installing Hugging Face Hub downloader"
+              pip install --no-cache-dir huggingface_hub
+
+              echo "Downloading ${MODEL_REPO}/${MODEL_FILE}"
+              python - <<'PY'
+              import os
+              from huggingface_hub import hf_hub_download
+
+              repo_id = os.environ["MODEL_REPO"]
+              filename = os.environ["MODEL_FILE"]
+
+              token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
+
+              path = hf_hub_download(
+                  repo_id=repo_id,
+                  filename=filename,
+                  local_dir="/models",
+                  local_dir_use_symlinks=False,
+                  token=token,
+              )
+              print(f"Downloaded to: {path}")
+              PY
+
+              ls -lah /models
+          volumeMounts:
+            - name: models
+              mountPath: /models
+
+      containers:
+        - name: llama
+          image: ghcr.io/ggml-org/llama.cpp:server-vulkan
+          args:
+            - "--model"
+            - "/models/Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf"
+            - "--host"
+            - "0.0.0.0"
+            - "--port"
+            - "8080"
+            - "--n-gpu-layers"
+            - "999"
+            - "--metrics"
+
+            # performance tuning
+            - "--ctx-size"
+            - "32768"
+            - "--parallel"
+            - "4"
+
+            # KV cache quantization
+            - "--cache-type-k"
+            - "q8_0"
+            - "--cache-type-v"
+            - "q8_0"
+          ports:
+            - name: http
+              containerPort: 8080
+
+          securityContext:
+            privileged: true
+
+          volumeMounts:
+            - name: models
+              mountPath: /models
+            - name: dri
+              mountPath: /dev/dri
+
+          resources:
+            requests:
+              cpu: "2"
+              memory: "4Gi"
+            limits:
+              cpu: "2"
+              memory: "4Gi"
+
+      volumes:
+        - name: models
+          persistentVolumeClaim:
+            claimName: llama-gpu-models-pvc
+        - name: dri
+          hostPath:
+            path: /dev/dri
+            type: Directory
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: llama-server-gpu
+  namespace: llama
+spec:
+  selector:
+    app: llama-server-gpu
+  ports:
+    - name: http
+      port: 8080
+      targetPort: http
+  type: ClusterIP
+---
+apiVersion: monitoring.coreos.com/v1
+kind: PodMonitor
+metadata:
+  name: llama-server-gpu
+  namespace: llama
+  labels:
+    app: llama-server-gpu
+spec:
+  namespaceSelector:
+    matchNames:
+      - llama
+  selector:
+    matchLabels:
+      app: llama-server-gpu
+  podMetricsEndpoints:
+    - port: http
+      path: /metrics
+      interval: 15s
--- a/manifests/llama/namespace.yaml
+++ b/manifests/llama/namespace.yaml
@@ -0,0 +1,42 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: llama
+---
+# apiVersion: storage.k8s.io/v1
+# kind: StorageClass
+# metadata:
+#   name: longhorn-llama
+# provisioner: driver.longhorn.io
+# parameters:
+#   numberOfReplicas: "2"
+#   staleReplicaTimeout: "30"
+# allowVolumeExpansion: true
+# reclaimPolicy: Retain
+# volumeBindingMode: Immediate
+# ---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: llama-gpu-models-pvc
+  namespace: llama
+spec:
+  accessModes:
+    - ReadWriteOnce
+  # storageClassName: longhorn-llama
+  resources:
+    requests:
+      storage: 50Gi
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: llama-cpu-models-pvc
+  namespace: llama
+spec:
+  accessModes:
+    - ReadWriteOnce
+  # storageClassName: longhorn-llama
+  resources:
+    requests:
+      storage: 100Gi
--- a/manifests/llama/rp.yaml
+++ b/manifests/llama/rp.yaml
@@ -0,0 +1,166 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llama-server-gpu-rp
+  namespace: llama
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llama-server-gpu-rp
+  template:
+    metadata:
+      labels:
+        app: llama-server-gpu-rp
+      annotations:
+        prometheus.io/scrape: "true"
+        prometheus.io/port: "8080"
+        prometheus.io/path: "/metrics"
+    spec:
+      nodeSelector:
+        gpu: amd
+
+      initContainers:
+        - name: download-model
+          image: python:3.11-slim
+          env:
+            - name: HF_HOME
+              value: /models/.hf
+            - name: MODEL_REPO
+              value: "mradermacher/Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B-GGUF"
+            - name: MODEL_FILE
+              value: "Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B.Q4_K_S.gguf"
+            # optional, only if you need gated/private models
+            # - name: HUGGING_FACE_HUB_TOKEN
+            #   valueFrom:
+            #     secretKeyRef:
+            #       name: hf-token
+            #       key: token
+          command:
+            - /bin/sh
+            - -c
+            - |
+              set -eux
+
+              MODEL_PATH="/models/${MODEL_FILE}"
+
+              if [ -f "${MODEL_PATH}" ]; then
+                echo "Model already exists at ${MODEL_PATH}, skipping download"
+                exit 0
+              fi
+
+              echo "Installing Hugging Face Hub downloader"
+              pip install --no-cache-dir huggingface_hub
+
+              echo "Downloading ${MODEL_REPO}/${MODEL_FILE}"
+              python - <<'PY'
+              import os
+              from huggingface_hub import hf_hub_download
+
+              repo_id = os.environ["MODEL_REPO"]
+              filename = os.environ["MODEL_FILE"]
+
+              token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
+
+              path = hf_hub_download(
+                  repo_id=repo_id,
+                  filename=filename,
+                  local_dir="/models",
+                  local_dir_use_symlinks=False,
+                  token=token,
+              )
+              print(f"Downloaded to: {path}")
+              PY
+
+              ls -lah /models
+          volumeMounts:
+            - name: models
+              mountPath: /models
+
+      containers:
+        - name: llama
+          image: ghcr.io/ggml-org/llama.cpp:server-vulkan
+          args:
+            - "--model"
+            - "/models/Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B.Q4_K_S.gguf"
+            - "--host"
+            - "0.0.0.0"
+            - "--port"
+            - "8080"
+            - "--n-gpu-layers"
+            - "999"
+            - "--metrics"
+
+            # performance tuning
+            - "--ctx-size"
+            - "32768"
+            - "--parallel"
+            - "1"
+
+            # KV cache quantization
+            - "--cache-type-k"
+            - "q8_0"
+            - "--cache-type-v"
+            - "q8_0"
+          ports:
+            - name: http
+              containerPort: 8080
+
+          securityContext:
+            privileged: true
+
+          volumeMounts:
+            - name: models
+              mountPath: /models
+            - name: dri
+              mountPath: /dev/dri
+
+          resources:
+            requests:
+              cpu: "2"
+              memory: "4Gi"
+            limits:
+              cpu: "2"
+              memory: "4Gi"
+
+      volumes:
+        - name: models
+          persistentVolumeClaim:
+            claimName: llama-gpu-models-pvc
+        - name: dri
+          hostPath:
+            path: /dev/dri
+            type: Directory
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: llama-server-gpu-rp
+  namespace: llama
+spec:
+  selector:
+    app: llama-server-gpu-rp
+  ports:
+    - name: http
+      port: 8080
+      targetPort: http
+  type: ClusterIP
+---
+apiVersion: monitoring.coreos.com/v1
+kind: PodMonitor
+metadata:
+  name: llama-server-gpu-rp
+  namespace: llama
+  labels:
+    app: llama-server-gpu-rp
+spec:
+  namespaceSelector:
+    matchNames:
+      - llama
+  selector:
+    matchLabels:
+      app: llama-server-gpu-rp
+  podMetricsEndpoints:
+    - port: http
+      path: /metrics
+      interval: 15s
--- a/manifests/longhorn/longhorn-ingress.yaml
+++ b/manifests/longhorn/longhorn-ingress.yaml
@@ -0,0 +1,26 @@
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: longhorn-ingress
+  namespace: longhorn-system
+  annotations:
+    cert-manager.io/cluster-issuer: letsencrypt-production
+    traefik.ingress.kubernetes.io/router.entrypoints: websecure
+    traefik.ingress.kubernetes.io/router.tls: "true"
+spec:
+  ingressClassName: traefik # We use Traefik as the ingress controller
+  tls:
+    - hosts:
+        - longhorn.mrt0rtikize.ru
+      secretName: longhorn-tls
+  rules:
+    - host: longhorn.mrt0rtikize.ru
+      http:
+        paths:
+          - path: /
+            pathType: Prefix
+            backend:
+              service:
+                name: longhorn-frontend # Service managing Longhorn dashboard
+                port:
+                  number: 80 # Service port where Longhorn UI runs
--- a/manifests/longhorn/test-pvc.yaml
+++ b/manifests/longhorn/test-pvc.yaml
@@ -0,0 +1,23 @@
+apiVersion: storage.k8s.io/v1
+kind: StorageClass
+metadata:
+  name: longhorn
+provisioner: driver.longhorn.io
+parameters:
+  numberOfReplicas: '2'
+  staleReplicaTimeout: '30'
+allowVolumeExpansion: true
+reclaimPolicy: Retain
+volumeBindingMode: Immediate
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: longhorn-pvc
+spec:
+  accessModes:
+    - ReadWriteOnce
+  storageClassName: longhorn
+  resources:
+    requests:
+      storage: 2Gi
--- a/manifests/metallb/ip-address-pool.yaml
+++ b/manifests/metallb/ip-address-pool.yaml
@@ -0,0 +1,10 @@
+apiVersion: metallb.io/v1beta1
+kind: IPAddressPool
+metadata:
+  name: default-address-pool
+  namespace: metallb-system
+spec:
+  addresses:
+    - 10.0.0.120-10.0.0.200
+  autoAssign: true
+  avoidBuggyIPs: true
--- a/manifests/metallb/l2advert.yaml
+++ b/manifests/metallb/l2advert.yaml
@@ -0,0 +1,8 @@
+apiVersion: metallb.io/v1beta1
+kind: L2Advertisement
+metadata:
+  name: default-advertisement
+  namespace: metallb-system
+spec:
+  ipAddressPools:
+    - default-address-pool
--- a/manifests/metrics/grafana-ingress.yaml
+++ b/manifests/metrics/grafana-ingress.yaml
@@ -0,0 +1,26 @@
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: grafana
+  namespace: metrics
+  annotations:
+    cert-manager.io/cluster-issuer: letsencrypt-production
+    traefik.ingress.kubernetes.io/router.entrypoints: websecure
+    traefik.ingress.kubernetes.io/router.tls: "true"
+spec:
+  ingressClassName: traefik
+  tls:
+    - hosts:
+        - grafana.mrt0rtikize.ru
+      secretName: grafana-tls
+  rules:
+    - host: grafana.mrt0rtikize.ru
+      http:
+        paths:
+          - path: /
+            pathType: Prefix
+            backend:
+              service:
+                name: kube-prometheus-stack-grafana
+                port:
+                  number: 80
--- a/manifests/metrics/kube-prometheus-stack-values.yaml
+++ b/manifests/metrics/kube-prometheus-stack-values.yaml
@@ -0,0 +1,90 @@
+fullnameOverride: kube-prometheus
+namespaceOverride: metrics
+
+prometheusOperator:
+  namespace: metrics
+  admissionWebhooks:
+    failurePolicy: Ignore
+
+alertmanager:
+  enabled: true
+  alertmanagerSpec:
+    resources:
+      requests:
+        cpu: 50m
+        memory: 128Mi
+      limits:
+        cpu: 200m
+        memory: 512Mi
+    storage:
+      volumeClaimTemplate:
+        spec:
+          accessModes:
+            - ReadWriteOnce
+          resources:
+            requests:
+              storage: 10Gi
+
+prometheus:
+  enabled: true
+  prometheusSpec:
+    replicas: 1
+    retention: 15d
+    walCompression: true
+    serviceMonitorSelectorNilUsesHelmValues: false
+    podMonitorSelectorNilUsesHelmValues: false
+    resources:
+      requests:
+        cpu: 100m
+        memory: 512Mi
+      limits:
+        cpu: 1000m
+        memory: 1Gi
+    storageSpec:
+      volumeClaimTemplate:
+        spec:
+          accessModes:
+            - ReadWriteOnce
+          resources:
+            requests:
+              storage: 50Gi
+    remoteWrite:
+      - url: http://victoria-metrics.metrics.svc.cluster.local:8428/api/v1/write
+        queueConfig:
+          maxSamplesPerSend: 10000
+          capacity: 5000
+          maxShards: 30
+
+kubeEtcd:
+  enabled: false
+
+kubeControllerManager:
+  enabled: false
+
+kubeScheduler:
+  enabled: false
+
+kubeProxy:
+  enabled: false
+
+grafana:
+  enabled: true
+  adminUser: admin
+  adminPassword: change-me
+  defaultDashboardsEnabled: true
+  resources:
+    requests:
+      cpu: 50m
+      memory: 256Mi
+    limits:
+      cpu: 200m
+      memory: 512Mi
+  persistence:
+    enabled: true
+    size: 10Gi
+  additionalDataSources:
+    - name: victoria-metrics
+      type: prometheus
+      access: proxy
+      url: http://victoria-metrics.metrics.svc.cluster.local:8428
+      isDefault: false
--- a/manifests/metrics/namespace.yaml
+++ b/manifests/metrics/namespace.yaml
@@ -0,0 +1,4 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: metrics
--- a/manifests/metrics/victoria-metrics-service.yaml
+++ b/manifests/metrics/victoria-metrics-service.yaml
@@ -0,0 +1,19 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: victoria-metrics
+  namespace: metrics
+  labels:
+    app.kubernetes.io/name: victoria-metrics-single
+    app.kubernetes.io/instance: victoria-metrics-single
+    app: server
+spec:
+  selector:
+    app.kubernetes.io/name: victoria-metrics-single
+    app.kubernetes.io/instance: victoria-metrics-single
+    app: server
+  ports:
+    - name: http
+      port: 8428
+      targetPort: 8428
+  type: ClusterIP
--- a/manifests/metrics/victoria-metrics-single-values.yaml
+++ b/manifests/metrics/victoria-metrics-single-values.yaml
@@ -0,0 +1,23 @@
+fullnameOverride: victoria-metrics-single
+namespaceOverride: metrics
+
+server:
+  retentionPeriod: 30d
+  scrapeInterval: 30s
+  replicaCount: 1
+  persistentVolume:
+    enabled: true
+    size: 200Gi
+  resources:
+    requests:
+      cpu: 100m
+      memory: 256Mi
+    limits:
+      cpu: 500m
+      memory: 1Gi
+  service:
+    type: ClusterIP
+    port: 8428
+
+serviceAccount:
+  create: true
--- a/manifests/sillytavern/configmap.yaml
+++ b/manifests/sillytavern/configmap.yaml
@@ -0,0 +1,122 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: sillytavern-config
+  namespace: sillytavern
+data:
+  config.yaml: |
+    dataRoot: ./data
+    listen: true
+    listenAddress:
+      ipv4: 0.0.0.0
+      ipv6: '[::]'
+    protocol:
+        ipv4: true
+        ipv6: false
+    dnsPreferIPv6: false
+    browserLaunch:
+      enabled: false
+      browser: 'default'
+      hostname: 'auto'
+      port: -1
+      avoidLocalhost: false
+    port: 8000
+    ssl:
+      enabled: false
+      certPath: "./certs/cert.pem"
+      keyPath: "./certs/privkey.pem"
+      keyPassphrase: ""
+    whitelistMode: false
+    enableForwardedWhitelist: false
+    whitelist:
+      - ::1
+      - 127.0.0.1
+    whitelistDockerHosts: false
+    basicAuthMode: false
+    basicAuthUser:
+      username: "user"
+      password: "password"
+    enableCorsProxy: false
+    requestProxy:
+      enabled: false
+      url: "socks5://username:password@example.com:1080"
+      bypass:
+        - localhost
+        - 127.0.0.1
+    enableUserAccounts: false
+    enableDiscreetLogin: false
+    perUserBasicAuth: false
+    sso:
+      autheliaAuth: false
+      authentikAuth: false
+    hostWhitelist:
+      enabled: false
+      scan: true
+      hosts: []
+    sessionTimeout: -1
+    disableCsrfProtection: false
+    securityOverride: false
+    logging:
+      enableAccessLog: true
+      minLogLevel: 0
+    rateLimiting:
+      preferRealIpHeader: false
+    backups:
+      common:
+        numberOfBackups: 50
+      chat:
+        enabled: true
+        checkIntegrity: true
+        maxTotalBackups: -1
+        throttleInterval: 10000
+    thumbnails:
+      enabled: true
+      format: "jpg"
+      quality: 95
+      dimensions: { 'bg': [160, 90], 'avatar': [96, 144], 'persona': [96, 144] }
+    performance:
+      lazyLoadCharacters: false
+      memoryCacheCapacity: '100mb'
+      useDiskCache: true
+    cacheBuster:
+      enabled: false
+      userAgentPattern: ''
+    allowKeysExposure: false
+    skipContentCheck: false
+    whitelistImportDomains:
+      - localhost
+      - cdn.discordapp.com
+      - files.catbox.moe
+      - raw.githubusercontent.com
+      - char-archive.evulid.cc
+    requestOverrides: []
+    extensions:
+      enabled: true
+      autoUpdate: true
+      models:
+        autoDownload: true
+        classification: Cohee/distilbert-base-uncased-go-emotions-onnx
+        captioning: Xenova/vit-gpt2-image-captioning
+        embedding: Cohee/jina-embeddings-v2-base-en
+        speechToText: Xenova/whisper-small
+        textToSpeech: Xenova/speecht5_tts
+    enableDownloadableTokenizers: true
+    promptPlaceholder: "[Start a new chat]"
+    openai:
+      randomizeUserId: false
+      captionSystemPrompt: ""
+    deepl:
+      formality: default
+    mistral:
+      enablePrefix: false
+    ollama:
+      keepAlive: -1
+      batchSize: -1
+    claude:
+      enableSystemPromptCache: false
+      cachingAtDepth: -1
+      extendedTTL: false
+    gemini:
+      apiVersion: 'v1beta'
+    enableServerPlugins: false
+    enableServerPluginsAutoUpdate: true
--- a/manifests/sillytavern/deployment.yaml
+++ b/manifests/sillytavern/deployment.yaml
@@ -0,0 +1,61 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: sillytavern
+  namespace: sillytavern
+spec:
+  replicas: 1
+  strategy:
+    type: Recreate
+  selector:
+    matchLabels:
+      app: sillytavern
+  template:
+    metadata:
+      labels:
+        app: sillytavern
+    spec:
+      containers:
+        - name: sillytavern
+          image: ghcr.io/sillytavern/sillytavern:latest
+          ports:
+            - containerPort: 8000
+              protocol: TCP
+          env:
+            - name: NODE_ENV
+              value: production
+            - name: FORCE_COLOR
+              value: "1"
+          envFrom:
+            - secretRef:
+                name: sillytavern-auth
+          volumeMounts:
+            - name: config
+              mountPath: /home/node/app/config/config.yaml
+              subPath: config.yaml
+            - name: data
+              mountPath: /home/node/app/data
+            - name: plugins
+              mountPath: /home/node/app/plugins
+            - name: extensions
+              mountPath: /home/node/app/public/scripts/extensions/third-party
+          resources:
+            requests:
+              cpu: "1"
+              memory: 1Gi
+            limits:
+              cpu: "4"
+              memory: 4Gi
+      volumes:
+        - name: config
+          configMap:
+            name: sillytavern-config
+        - name: data
+          persistentVolumeClaim:
+            claimName: sillytavern-data
+        - name: plugins
+          persistentVolumeClaim:
+            claimName: sillytavern-plugins
+        - name: extensions
+          persistentVolumeClaim:
+            claimName: sillytavern-extensions
--- a/manifests/sillytavern/ingress.yaml
+++ b/manifests/sillytavern/ingress.yaml
@@ -0,0 +1,30 @@
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: sillytavern
+  namespace: sillytavern
+  labels:
+    app.kubernetes.io/name: sillytavern
+    app.kubernetes.io/component: frontend
+    app.kubernetes.io/part-of: sillytavern
+  annotations:
+    cert-manager.io/cluster-issuer: letsencrypt-production
+    traefik.ingress.kubernetes.io/router.entrypoints: websecure
+    traefik.ingress.kubernetes.io/router.tls: "true"
+spec:
+  ingressClassName: traefik
+  tls:
+    - hosts:
+        - sillytavern.mrt0rtikize.ru
+      secretName: sillytavern-tls
+  rules:
+    - host: sillytavern.mrt0rtikize.ru
+      http:
+        paths:
+          - path: /
+            pathType: Prefix
+            backend:
+              service:
+                name: sillytavern
+                port:
+                  number: 8000
--- a/manifests/sillytavern/namespace.yaml
+++ b/manifests/sillytavern/namespace.yaml
@@ -0,0 +1,4 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: sillytavern
--- a/manifests/sillytavern/pvc.yaml
+++ b/manifests/sillytavern/pvc.yaml
@@ -0,0 +1,35 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: sillytavern-data
+  namespace: sillytavern
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 5Gi
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: sillytavern-plugins
+  namespace: sillytavern
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 1Gi
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: sillytavern-extensions
+  namespace: sillytavern
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 1Gi
--- a/manifests/sillytavern/secret.yaml
+++ b/manifests/sillytavern/secret.yaml
@@ -0,0 +1,10 @@
+apiVersion: v1
+kind: Secret
+metadata:
+  name: sillytavern-auth
+  namespace: sillytavern
+type: Opaque
+stringData:
+  SILLYTAVERN_BASICAUTHMODE: "true"
+  SILLYTAVERN_BASICAUTHUSER_USERNAME: admin
+  SILLYTAVERN_BASICAUTHUSER_PASSWORD: 0cdaa30c396dae77
--- a/manifests/sillytavern/service.yaml
+++ b/manifests/sillytavern/service.yaml
@@ -0,0 +1,13 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: sillytavern
+  namespace: sillytavern
+spec:
+  selector:
+    app: sillytavern
+  ports:
+    - port: 8000
+      targetPort: 8000
+      protocol: TCP
+  type: ClusterIP
--- a/metrics/README.md
+++ b/metrics/README.md
@@ -0,0 +1,62 @@
+# metrics stack
+
+Opinionated manifests for deploying kube-prometheus-stack (Prometheus Operator + Grafana) together with a VictoriaMetrics single-node database in the `metrics` namespace.
+
+## Install / upgrade
+
+```sh
+kubectl apply -f metrics/namespace.yaml
+
+# kube-prometheus-stack
+target=sc prometheus-community
+helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
+helm repo update
+helm upgrade --install kube-prometheus-stack prometheus-community/kube-prometheus-stack \
+  --namespace metrics \
+  --values metrics/kube-prometheus-stack-values.yaml
+
+kubectl --namespace metrics get secret kube-prometheus-stack-grafana \
+  -o jsonpath="{.data.admin-password}" | base64 -d
+echo
+
+# expose grafana via Traefik
+kubectl apply -f metrics/grafana-ingress.yaml
+kubectl -n metrics get ingress grafana
+
+# victoria metrics for long-term storage
+helm repo add victoria-metrics https://victoriametrics.github.io/helm-charts
+helm upgrade --install victoria-metrics-single victoria-metrics/victoria-metrics-single \
+  --namespace metrics \
+  --values metrics/victoria-metrics-single-values.yaml
+
+# expose victoria metrics via ClusterIP for Prometheus/Grafana
+kubectl apply -f metrics/victoria-metrics-service.yaml
+```
+
+The manifests default to the Yandex Managed Kubernetes dynamic storage class `yc-network-hdd`; tweak the `storageClassName`/`storageClass` fields and capacities if you prefer something else.
+Before applying `metrics/grafana-ingress.yaml`, update the host (`grafana.playground.t01tt.tech`) and, if needed, change the `cert-manager.io/cluster-issuer` annotation to match your staging/production workflow. The ingress uses the `traefik` ingress class.
+
+## Components
+
+- **Prometheus Operator** provisions Prometheus, Alertmanager and related CRDs. Remote write targets VictoriaMetrics for durable retention.
+- **Grafana** is pre-provisioned with persistence enabled and a secondary data source pointing at VictoriaMetrics.
+- **VictoriaMetrics** stores metrics for long-term retention while also serving query traffic for Grafana. A dedicated ClusterIP service (`metrics/victoria-metrics-service.yaml`) exposes port 8428 for Prometheus remote write and Grafana queries.
+
+## Database choices
+
+Prometheus ships with an embedded TSDB. For longer retention, clustering or multi-tenant needs you can offload data to:
+
+- **VictoriaMetrics** (single, clustered, or managed) – cost-efficient, Prometheus-compatible, supports multi-year retention.
+- **Thanos / Cortex / Grafana Mimir** – horizontally scalable object-storage backed TSDBs with multi-cluster federation.
+- **ClickHouse / TimescaleDB / PostgreSQL** – SQL stores for advanced analytics (requires Promscale or similar adapter).
+- **Graphite / InfluxDB** – legacy or streaming-friendly stores; integrate via remote write adapters.
+
+Pick the backend that matches your retention and query latency requirements. Remote write configuration lives under `prometheus.prometheusSpec.remoteWrite` in `kube-prometheus-stack-values.yaml`.
+
+## Post-install checks
+
+```sh
+kubectl -n metrics get pods
+kubectl -n metrics get svc
+kubectl -n metrics get prometheus,prometheusrules,servicemonitors -A
+```
--- a/metrics/grafana-ingress.yaml
+++ b/metrics/grafana-ingress.yaml
@@ -0,0 +1,26 @@
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: grafana
+  namespace: metrics
+  annotations:
+    cert-manager.io/cluster-issuer: letsencrypt-production
+    traefik.ingress.kubernetes.io/router.entrypoints: websecure
+    traefik.ingress.kubernetes.io/router.tls: "true"
+spec:
+  ingressClassName: traefik
+  tls:
+    - hosts:
+        - grafana.mrt0rtikize.ru
+      secretName: grafana-tls
+  rules:
+    - host: grafana.mrt0rtikize.ru
+      http:
+        paths:
+          - path: /
+            pathType: Prefix
+            backend:
+              service:
+                name: kube-prometheus-stack-grafana
+                port:
+                  number: 80
--- a/metrics/kube-prometheus-stack-values.yaml
+++ b/metrics/kube-prometheus-stack-values.yaml
@@ -0,0 +1,90 @@
+fullnameOverride: kube-prometheus
+namespaceOverride: metrics
+
+prometheusOperator:
+  namespace: metrics
+  admissionWebhooks:
+    failurePolicy: Ignore
+
+alertmanager:
+  enabled: true
+  alertmanagerSpec:
+    resources:
+      requests:
+        cpu: 50m
+        memory: 128Mi
+      limits:
+        cpu: 200m
+        memory: 512Mi
+    storage:
+      volumeClaimTemplate:
+        spec:
+          accessModes:
+            - ReadWriteOnce
+          resources:
+            requests:
+              storage: 10Gi
+
+prometheus:
+  enabled: true
+  prometheusSpec:
+    replicas: 1
+    retention: 15d
+    walCompression: true
+    serviceMonitorSelectorNilUsesHelmValues: false
+    podMonitorSelectorNilUsesHelmValues: false
+    resources:
+      requests:
+        cpu: 100m
+        memory: 512Mi
+      limits:
+        cpu: 1000m
+        memory: 1Gi
+    storageSpec:
+      volumeClaimTemplate:
+        spec:
+          accessModes:
+            - ReadWriteOnce
+          resources:
+            requests:
+              storage: 50Gi
+    remoteWrite:
+      - url: http://victoria-metrics.metrics.svc.cluster.local:8428/api/v1/write
+        queueConfig:
+          maxSamplesPerSend: 10000
+          capacity: 5000
+          maxShards: 30
+
+kubeEtcd:
+  enabled: false
+
+kubeControllerManager:
+  enabled: false
+
+kubeScheduler:
+  enabled: false
+
+kubeProxy:
+  enabled: false
+
+grafana:
+  enabled: true
+  adminUser: admin
+  adminPassword: change-me
+  defaultDashboardsEnabled: true
+  resources:
+    requests:
+      cpu: 50m
+      memory: 256Mi
+    limits:
+      cpu: 200m
+      memory: 512Mi
+  persistence:
+    enabled: true
+    size: 10Gi
+  additionalDataSources:
+    - name: victoria-metrics
+      type: prometheus
+      access: proxy
+      url: http://victoria-metrics.metrics.svc.cluster.local:8428
+      isDefault: false
--- a/metrics/namespace.yaml
+++ b/metrics/namespace.yaml
@@ -0,0 +1,4 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: metrics
--- a/metrics/victoria-metrics-service.yaml
+++ b/metrics/victoria-metrics-service.yaml
@@ -0,0 +1,19 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: victoria-metrics
+  namespace: metrics
+  labels:
+    app.kubernetes.io/name: victoria-metrics-single
+    app.kubernetes.io/instance: victoria-metrics-single
+    app: server
+spec:
+  selector:
+    app.kubernetes.io/name: victoria-metrics-single
+    app.kubernetes.io/instance: victoria-metrics-single
+    app: server
+  ports:
+    - name: http
+      port: 8428
+      targetPort: 8428
+  type: ClusterIP
--- a/metrics/victoria-metrics-single-values.yaml
+++ b/metrics/victoria-metrics-single-values.yaml
@@ -0,0 +1,23 @@
+fullnameOverride: victoria-metrics-single
+namespaceOverride: metrics
+
+server:
+  retentionPeriod: 30d
+  scrapeInterval: 30s
+  replicaCount: 1
+  persistentVolume:
+    enabled: true
+    size: 200Gi
+  resources:
+    requests:
+      cpu: 100m
+      memory: 256Mi
+    limits:
+      cpu: 500m
+      memory: 1Gi
+  service:
+    type: ClusterIP
+    port: 8428
+
+serviceAccount:
+  create: true
--- a/sillytavern/configmap.yaml
+++ b/sillytavern/configmap.yaml
@@ -0,0 +1,122 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: sillytavern-config
+  namespace: sillytavern
+data:
+  config.yaml: |
+    dataRoot: ./data
+    listen: true
+    listenAddress:
+      ipv4: 0.0.0.0
+      ipv6: '[::]'
+    protocol:
+        ipv4: true
+        ipv6: false
+    dnsPreferIPv6: false
+    browserLaunch:
+      enabled: false
+      browser: 'default'
+      hostname: 'auto'
+      port: -1
+      avoidLocalhost: false
+    port: 8000
+    ssl:
+      enabled: false
+      certPath: "./certs/cert.pem"
+      keyPath: "./certs/privkey.pem"
+      keyPassphrase: ""
+    whitelistMode: false
+    enableForwardedWhitelist: false
+    whitelist:
+      - ::1
+      - 127.0.0.1
+    whitelistDockerHosts: false
+    basicAuthMode: false
+    basicAuthUser:
+      username: "user"
+      password: "password"
+    enableCorsProxy: false
+    requestProxy:
+      enabled: false
+      url: "socks5://username:password@example.com:1080"
+      bypass:
+        - localhost
+        - 127.0.0.1
+    enableUserAccounts: false
+    enableDiscreetLogin: false
+    perUserBasicAuth: false
+    sso:
+      autheliaAuth: false
+      authentikAuth: false
+    hostWhitelist:
+      enabled: false
+      scan: true
+      hosts: []
+    sessionTimeout: -1
+    disableCsrfProtection: false
+    securityOverride: false
+    logging:
+      enableAccessLog: true
+      minLogLevel: 0
+    rateLimiting:
+      preferRealIpHeader: false
+    backups:
+      common:
+        numberOfBackups: 50
+      chat:
+        enabled: true
+        checkIntegrity: true
+        maxTotalBackups: -1
+        throttleInterval: 10000
+    thumbnails:
+      enabled: true
+      format: "jpg"
+      quality: 95
+      dimensions: { 'bg': [160, 90], 'avatar': [96, 144], 'persona': [96, 144] }
+    performance:
+      lazyLoadCharacters: false
+      memoryCacheCapacity: '100mb'
+      useDiskCache: true
+    cacheBuster:
+      enabled: false
+      userAgentPattern: ''
+    allowKeysExposure: false
+    skipContentCheck: false
+    whitelistImportDomains:
+      - localhost
+      - cdn.discordapp.com
+      - files.catbox.moe
+      - raw.githubusercontent.com
+      - char-archive.evulid.cc
+    requestOverrides: []
+    extensions:
+      enabled: true
+      autoUpdate: true
+      models:
+        autoDownload: true
+        classification: Cohee/distilbert-base-uncased-go-emotions-onnx
+        captioning: Xenova/vit-gpt2-image-captioning
+        embedding: Cohee/jina-embeddings-v2-base-en
+        speechToText: Xenova/whisper-small
+        textToSpeech: Xenova/speecht5_tts
+    enableDownloadableTokenizers: true
+    promptPlaceholder: "[Start a new chat]"
+    openai:
+      randomizeUserId: false
+      captionSystemPrompt: ""
+    deepl:
+      formality: default
+    mistral:
+      enablePrefix: false
+    ollama:
+      keepAlive: -1
+      batchSize: -1
+    claude:
+      enableSystemPromptCache: false
+      cachingAtDepth: -1
+      extendedTTL: false
+    gemini:
+      apiVersion: 'v1beta'
+    enableServerPlugins: false
+    enableServerPluginsAutoUpdate: true
--- a/sillytavern/deployment.yaml
+++ b/sillytavern/deployment.yaml
@@ -0,0 +1,61 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: sillytavern
+  namespace: sillytavern
+spec:
+  replicas: 1
+  strategy:
+    type: Recreate
+  selector:
+    matchLabels:
+      app: sillytavern
+  template:
+    metadata:
+      labels:
+        app: sillytavern
+    spec:
+      containers:
+        - name: sillytavern
+          image: ghcr.io/sillytavern/sillytavern:latest
+          ports:
+            - containerPort: 8000
+              protocol: TCP
+          env:
+            - name: NODE_ENV
+              value: production
+            - name: FORCE_COLOR
+              value: "1"
+          envFrom:
+            - secretRef:
+                name: sillytavern-auth
+          volumeMounts:
+            - name: config
+              mountPath: /home/node/app/config/config.yaml
+              subPath: config.yaml
+            - name: data
+              mountPath: /home/node/app/data
+            - name: plugins
+              mountPath: /home/node/app/plugins
+            - name: extensions
+              mountPath: /home/node/app/public/scripts/extensions/third-party
+          resources:
+            requests:
+              cpu: "1"
+              memory: 1Gi
+            limits:
+              cpu: "4"
+              memory: 4Gi
+      volumes:
+        - name: config
+          configMap:
+            name: sillytavern-config
+        - name: data
+          persistentVolumeClaim:
+            claimName: sillytavern-data
+        - name: plugins
+          persistentVolumeClaim:
+            claimName: sillytavern-plugins
+        - name: extensions
+          persistentVolumeClaim:
+            claimName: sillytavern-extensions
--- a/sillytavern/ingress.yaml
+++ b/sillytavern/ingress.yaml
@@ -0,0 +1,30 @@
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: sillytavern
+  namespace: sillytavern
+  labels:
+    app.kubernetes.io/name: sillytavern
+    app.kubernetes.io/component: frontend
+    app.kubernetes.io/part-of: sillytavern
+  annotations:
+    cert-manager.io/cluster-issuer: letsencrypt-production
+    traefik.ingress.kubernetes.io/router.entrypoints: websecure
+    traefik.ingress.kubernetes.io/router.tls: "true"
+spec:
+  ingressClassName: traefik
+  tls:
+    - hosts:
+        - sillytavern.mrt0rtikize.ru
+      secretName: sillytavern-tls
+  rules:
+    - host: sillytavern.mrt0rtikize.ru
+      http:
+        paths:
+          - path: /
+            pathType: Prefix
+            backend:
+              service:
+                name: sillytavern
+                port:
+                  number: 8000
--- a/sillytavern/namespace.yaml
+++ b/sillytavern/namespace.yaml
@@ -0,0 +1,4 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: sillytavern
--- a/sillytavern/pvc.yaml
+++ b/sillytavern/pvc.yaml
@@ -0,0 +1,35 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: sillytavern-data
+  namespace: sillytavern
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 5Gi
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: sillytavern-plugins
+  namespace: sillytavern
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 1Gi
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: sillytavern-extensions
+  namespace: sillytavern
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 1Gi
--- a/sillytavern/secret.yaml
+++ b/sillytavern/secret.yaml
@@ -0,0 +1,10 @@
+apiVersion: v1
+kind: Secret
+metadata:
+  name: sillytavern-auth
+  namespace: sillytavern
+type: Opaque
+stringData:
+  SILLYTAVERN_BASICAUTHMODE: "true"
+  SILLYTAVERN_BASICAUTHUSER_USERNAME: admin
+  SILLYTAVERN_BASICAUTHUSER_PASSWORD: 0cdaa30c396dae77
--- a/sillytavern/service.yaml
+++ b/sillytavern/service.yaml
@@ -0,0 +1,13 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: sillytavern
+  namespace: sillytavern
+spec:
+  selector:
+    app: sillytavern
+  ports:
+    - port: 8000
+      targetPort: 8000
+      protocol: TCP
+  type: ClusterIP