Initial commit: k3s GitOps manifests with ArgoCD App-of-Apps

2026-05-05 13:18:51 +03:00
commit 5d9a80b976
65 changed files with 3445 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
 config
--- a/argocd/app-of-apps.yaml
+++ b/argocd/app-of-apps.yaml
@@ -0,0 +1,25 @@
 apiVersion: argoproj.io/v1alpha1
 kind: Application
 metadata:
  name: root-app
  namespace: argocd
  finalizers:
    - resources-finalizer.argocd.argoproj.io
 spec:
  project: default
  source:
    repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git
    targetRevision: main
    path: argocd/apps
    directory:
      recurse: true
      include: "*.yaml"
  destination:
    server: https://kubernetes.default.svc
    namespace: argocd
  syncPolicy:
    automated:
      prune: true
      selfHeal: true
    syncOptions:
      - CreateNamespace=true
--- a/argocd/apps/cert-manager.yaml
+++ b/argocd/apps/cert-manager.yaml
@@ -0,0 +1,26 @@
 apiVersion: argoproj.io/v1alpha1
 kind: Application
 metadata:
  name: cert-manager
  namespace: argocd
  finalizers:
    - resources-finalizer.argocd.argoproj.io
 spec:
  project: default
  source:
    repoURL: https://charts.jetstack.io
    chart: cert-manager
    targetRevision: v1.20.1
    helm:
      values: |
        crds:
          enabled: true
  destination:
    server: https://kubernetes.default.svc
    namespace: cert-manager
  syncPolicy:
    automated:
      prune: true
      selfHeal: true
    syncOptions:
      - CreateNamespace=true
--- a/argocd/apps/llama.yaml
+++ b/argocd/apps/llama.yaml
@@ -0,0 +1,22 @@
 apiVersion: argoproj.io/v1alpha1
 kind: Application
 metadata:
  name: llama
  namespace: argocd
  finalizers:
    - resources-finalizer.argocd.argoproj.io
 spec:
  project: default
  source:
    repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git
    targetRevision: main
    path: manifests/llama
  destination:
    server: https://kubernetes.default.svc
    namespace: llama
  syncPolicy:
    automated:
      prune: true
      selfHeal: true
    syncOptions:
      - CreateNamespace=true
--- a/argocd/apps/longhorn.yaml
+++ b/argocd/apps/longhorn.yaml
@@ -0,0 +1,29 @@
 apiVersion: argoproj.io/v1alpha1
 kind: Application
 metadata:
  name: longhorn
  namespace: argocd
  finalizers:
    - resources-finalizer.argocd.argoproj.io
 spec:
  project: default
  sources:
    - repoURL: https://charts.longhorn.io
      chart: longhorn
      targetRevision: 1.11.2
      helm:
        values: |
          preUpgradeChecker:
            jobEnabled: false
    - repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git
      targetRevision: main
      path: manifests/longhorn
  destination:
    server: https://kubernetes.default.svc
    namespace: longhorn-system
  syncPolicy:
    automated:
      prune: true
      selfHeal: true
    syncOptions:
      - CreateNamespace=true
--- a/argocd/apps/metallb.yaml
+++ b/argocd/apps/metallb.yaml
@@ -0,0 +1,29 @@
 apiVersion: argoproj.io/v1alpha1
 kind: Application
 metadata:
  name: metallb
  namespace: argocd
  finalizers:
    - resources-finalizer.argocd.argoproj.io
 spec:
  project: default
  sources:
    - repoURL: https://metallb.github.io/metallb
      chart: metallb
      targetRevision: 0.14.5
      helm:
        values: |
          crds:
            enabled: true
    - repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git
      targetRevision: main
      path: manifests/metallb
  destination:
    server: https://kubernetes.default.svc
    namespace: metallb-system
  syncPolicy:
    automated:
      prune: true
      selfHeal: true
    syncOptions:
      - CreateNamespace=true
--- a/argocd/apps/metrics.yaml
+++ b/argocd/apps/metrics.yaml
@@ -0,0 +1,37 @@
 apiVersion: argoproj.io/v1alpha1
 kind: Application
 metadata:
  name: metrics
  namespace: argocd
  finalizers:
    - resources-finalizer.argocd.argoproj.io
 spec:
  project: default
  sources:
    - repoURL: https://prometheus-community.github.io/helm-charts
      chart: kube-prometheus-stack
      targetRevision: 82.16.2
      helm:
        valueFiles:
          - $values/manifests/metrics/kube-prometheus-stack-values.yaml
    - repoURL: https://victoriametrics.github.io/helm-charts/
      chart: victoria-metrics-single
      targetRevision: 0.34.0
      helm:
        valueFiles:
          - $values/manifests/metrics/victoria-metrics-single-values.yaml
    - repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git
      targetRevision: main
      ref: values
    - repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git
      targetRevision: main
      path: manifests/metrics
  destination:
    server: https://kubernetes.default.svc
    namespace: metrics
  syncPolicy:
    automated:
      prune: true
      selfHeal: true
    syncOptions:
      - CreateNamespace=true
--- a/argocd/apps/sillytavern.yaml
+++ b/argocd/apps/sillytavern.yaml
@@ -0,0 +1,22 @@
 apiVersion: argoproj.io/v1alpha1
 kind: Application
 metadata:
  name: sillytavern
  namespace: argocd
  finalizers:
    - resources-finalizer.argocd.argoproj.io
 spec:
  project: default
  source:
    repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git
    targetRevision: main
    path: manifests/sillytavern
  destination:
    server: https://kubernetes.default.svc
    namespace: sillytavern
  syncPolicy:
    automated:
      prune: true
      selfHeal: true
    syncOptions:
      - CreateNamespace=true
--- a/bootstrap.sh
+++ b/bootstrap.sh
@@ -0,0 +1,229 @@
 #!/bin/bash
 set -e
 # =============================================================================
 # k3s GitOps Bootstrap Script
 # =============================================================================
 # This script sets up Gitea + ArgoCD on the k3s cluster and configures
 # GitOps with the App-of-Apps pattern.
 #
 # Prerequisites:
 #   - kubectl + kubeconfig access to the cluster
 #   - helm installed
 #   - git installed
 #   - DNS for *.mrt0rtikize.ru pointing to cluster nodes
 # =============================================================================
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
 REPO_DIR="$(dirname "$SCRIPT_DIR")"
 KUBECONFIG="${REPO_DIR}/config"
 KCTL="kubectl --kubeconfig ${KUBECONFIG}"
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 NC='\033[0m'
 echo -e "${GREEN}==============================================${NC}"
 echo -e "${GREEN}   k3s GitOps Bootstrap${NC}"
 echo -e "${GREEN}==============================================${NC}"
 echo ""
 # -----------------------------------------------------------------------------
 # Step 1: Deploy Gitea
 # -----------------------------------------------------------------------------
 echo -e "${YELLOW}[1/6] Deploying Gitea...${NC}"
 ${KCTL} apply -f "${REPO_DIR}/bootstrap/gitea/"
 echo "  Waiting for Gitea pod to be ready..."
 ${KCTL} wait --for=condition=ready pod -l app=gitea -n gitea --timeout=120s 2>/dev/null || {
    echo -e "${RED}  Gitea pod not ready after 120s. Checking status...${NC}"
    ${KCTL} get pod -n gitea
    exit 1
 }
 echo -e "${GREEN}  Gitea deployed!${NC}"
 echo ""
 # -----------------------------------------------------------------------------
 # Step 2: Gitea initial setup (manual)
 # -----------------------------------------------------------------------------
 echo -e "${YELLOW}[2/6] Gitea setup${NC}"
 echo ""
 echo "  Gitea is running. Please open the install page in your browser:"
 echo ""
 echo -e "    ${GREEN}https://git.mrt0rtikize.ru/${NC}"
 echo ""
 echo "  Complete the install wizard with these settings:"
 echo "    - Database: SQLite3"
 echo "    - Admin Username: gitea"
 echo "    - Admin Password: <choose a strong password>"
 echo "    - Confirm Password: <same>"
 echo "    - Admin Email: admin@mrt0rtikize.ru"
 echo ""
 echo "  After install, create a repository named:"
 echo ""
 echo -e "    ${GREEN}k3s-manifests${NC}"
 echo ""
 echo "  Make it PUBLIC (so ArgoCD can read it without auth)."
 echo ""
 GITEA_PASSWORD=""
 read -p "  Gitea admin password (from install wizard): " GITEA_PASSWORD
 if [ -z "$GITEA_PASSWORD" ]; then
    echo -e "${RED}  Password is required. Exiting.${NC}"
    exit 1
 fi
 # Save password for later use
 GITEA_EXTERNAL="https://git.mrt0rtikize.ru"
 GITEA_INTERNAL="http://gitea.gitea.svc.cluster.local:3000"
 GITEA_USER="gitea"
 GITEA_REPO="k3s-manifests"
 GITEA_REPO_URL="${GITEA_EXTERNAL}/${GITEA_USER}/${GITEA_REPO}.git"
 GITEA_INTERNAL_REPO="${GITEA_INTERNAL}/${GITEA_USER}/${GITEA_REPO}.git"
 echo ""
 # -----------------------------------------------------------------------------
 # Step 3: Initialize git repo and push manifests
 # -----------------------------------------------------------------------------
 echo -e "${YELLOW}[3/6] Initializing git repo...${NC}"
 # Create .gitignore
 cat > "${REPO_DIR}/.gitignore" << 'GITIGNORE'
 # Sensitive files
 config
 GITIGNORE
 cd "${REPO_DIR}"
 if [ ! -d ".git" ]; then
    git init
    git checkout -b main
 fi
 git add .
 git commit -m "Initial commit: k3s GitOps manifests" 2>/dev/null || {
    echo "  Nothing to commit (already up to date)"
 }
 echo "  Pushing to Gitea..."
 GIT_TERMINAL_PROMPT=0 git push -u "${GITEA_REPO_URL}" main 2>/dev/null || {
    echo ""
    echo -e "  ${RED}Push failed.${NC} Did you create the '${GITEA_REPO}' repo in Gitea?"
    echo "  You can retry manually:"
    echo "    cd ${REPO_DIR}"
    echo "    git push -u ${GITEA_REPO_URL} main"
    echo ""
    read -p "  Press Enter after pushing... " -r
 }
 echo -e "${GREEN}  Manifests pushed to Gitea!${NC}"
 echo ""
 # -----------------------------------------------------------------------------
 # Step 4: Install ArgoCD
 # -----------------------------------------------------------------------------
 echo -e "${YELLOW}[4/6] Installing ArgoCD...${NC}"
 helm repo add argo https://argoproj.github.io/argo-helm 2>/dev/null || true
 helm repo update
 helm upgrade --install argocd argo/argo-cd \
  --namespace argocd \
  --create-namespace \
  --set server.extraArgs[0]="--insecure" \
  --set configs.params."server\.insecure"=true \
  --set configs.cm.timeout.reconciliation=180s \
  --wait \
  --timeout 300s
 ARGOCD_PASSWORD=$(${KCTL} -n argocd get secret argocd-initial-admin-secret -o jsonpath="{.data.password}" 2>/dev/null | base64 -d)
 echo -e "${GREEN}  ArgoCD installed!${NC}"
 echo ""
 echo "  ArgoCD UI (port-forward):"
 echo "    kubectl --kubeconfig ${KUBECONFIG} port-forward svc/argocd-server -n argocd 8080:80"
 echo "  Username: admin"
 echo ""
 if [ -n "$ARGOCD_PASSWORD" ]; then
    echo "  Password: ${ARGOCD_PASSWORD}"
 fi
 echo ""
 # -----------------------------------------------------------------------------
 # Step 5: Configure ArgoCD → Gitea connection
 # -----------------------------------------------------------------------------
 echo -e "${YELLOW}[5/6] Configuring ArgoCD → Gitea connection...${NC}"
 # Add Gitea as a repository in ArgoCD
 # Using argocd CLI if available, otherwise using creds + secret
 if command -v argocd &> /dev/null; then
    echo "  Using argocd CLI..."
    ARGOCD_SERVER="localhost:8080"
    echo "  Please port-forward ArgoCD in another terminal:"
    echo "    kubectl --kubeconfig ${KUBECONFIG} port-forward svc/argocd-server -n argocd 8080:80"
    echo ""
    read -p "  Press Enter when ready..." -r
    argocd login "${ARGOCD_SERVER}" --username admin --password "${ARGOCD_PASSWORD}" --insecure
    argocd repo add "${GITEA_INTERNAL_REPO}" --name gitea-k3s --type git
 else
    # Fallback: create repository secret manually
    echo "  Creating repository secret manually..."
    ${KCTL} -n argocd create secret generic gitea-k3s-repo \
      --from-literal=url="${GITEA_INTERNAL_REPO}" \
      --from-literal=type=git \
      --from-literal=name=gitea-k3s \
      --dry-run=client -o yaml | \
    sed 's/name: gitea-k3s-repo/name: gitea-k3s-repo\n  labels:\n    argocd.argoproj.io\/secret-type: repository/' | \
    ${KCTL} apply -f - 2>/dev/null
    # For a public repo, ArgoCD can access it without credentials
    # If the repo is private, uncomment and configure:
    # ${KCTL} -n argocd create secret generic gitea-k3s-repo \
    #   --from-literal=url="${GITEA_INTERNAL_REPO}" \
    #   --from-literal=type=git \
    #   --from-literal=name=gitea-k3s \
    #   --from-literal=username="${GITEA_USER}" \
    #   --from-literal=password="${GITEA_PASSWORD}" \
    #   --dry-run=client -o yaml | \
    # sed 's/name: gitea-k3s-repo/name: gitea-k3s-repo\n  labels:\n    argocd.argoproj.io\/secret-type: repository/' | \
    # ${KCTL} apply -f -
 fi
 echo -e "${GREEN}  Repository configured!${NC}"
 echo ""
 # -----------------------------------------------------------------------------
 # Step 6: Apply the root app
 # -----------------------------------------------------------------------------
 echo -e "${YELLOW}[6/6] Applying root App-of-Apps...${NC}"
 ${KCTL} apply -f "${REPO_DIR}/argocd/app-of-apps.yaml"
 echo ""
 echo -e "${GREEN}==============================================${NC}"
 echo -e "${GREEN}   Bootstrap Complete!${NC}"
 echo -e "${GREEN}==============================================${NC}"
 echo ""
 echo "  Root app created. ArgoCD will now sync all child apps:"
 echo ""
 echo "    - cert-manager"
 echo "    - metallb"
 echo "    - longhorn"
 echo "    - metrics (prometheus + victoria-metrics)"
 echo "    - llama"
 echo "    - sillytavern"
 echo ""
 echo "  Monitor progress:"
 echo "    kubectl --kubeconfig ${KUBECONFIG} port-forward svc/argocd-server -n argocd 8080:80"
 echo "    Open http://localhost:8080"
 echo "    Login: admin / ${ARGOCD_PASSWORD}"
 echo ""
 echo "  Check sync status:"
 echo "    kubectl --kubeconfig ${KUBECONFIG} get applications -n argocd"
 echo ""
--- a/bootstrap/argocd/install.sh
+++ b/bootstrap/argocd/install.sh
@@ -0,0 +1,36 @@
 #!/bin/bash
 set -e
 # Bootstrap ArgoCD on the k3s cluster
 # This is a one-time manual step before GitOps takes over
 KUBECONFIG="/home/mrt0rtikize/infra/k3s/config"
 KCTL="kubectl --kubeconfig ${KUBECONFIG}"
 echo "=== Installing ArgoCD ==="
 # Add ArgoCD Helm repo
 helm repo add argo https://argoproj.github.io/argo-helm 2>/dev/null || true
 helm repo update
 # Install ArgoCD
 helm upgrade --install argocd argo/argo-cd \
  --namespace argocd \
  --create-namespace \
  --set server.extraArgs[0]="--insecure" \
  --set configs.params."server\.insecure"=true \
  --set configs.cm.timeout.reconciliation=180s \
  --wait \
  --timeout 300s
 echo ""
 echo "=== ArgoCD installed ==="
 echo ""
 echo "To access ArgoCD UI:"
 echo "  kubectl --kubeconfig ${KUBECONFIG} port-forward svc/argocd-server -n argocd 8080:80"
 echo ""
 echo "Admin password:"
 kubectl --kubeconfig ${KUBECONFIG} -n argocd get secret argocd-initial-admin-secret -o jsonpath="{.data.password}" | base64 -d
 echo ""
 echo ""
 echo "Login with username: admin"
--- a/bootstrap/gitea/deployment.yaml
+++ b/bootstrap/gitea/deployment.yaml
@@ -0,0 +1,62 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: gitea
  namespace: gitea
 spec:
  replicas: 1
  strategy:
    type: Recreate
  selector:
    matchLabels:
      app: gitea
  template:
    metadata:
      labels:
        app: gitea
    spec:
      containers:
        - name: gitea
          image: gitea/gitea:1.24
          ports:
            - containerPort: 3000
              name: http
            - containerPort: 22
              name: ssh
          env:
            - name: GITEA__database__DB_TYPE
              value: sqlite3
            - name: GITEA__server__DOMAIN
              value: git.mrt0rtikize.ru
            - name: GITEA__server__ROOT_URL
              value: https://git.mrt0rtikize.ru
            - name: GITEA__server__HTTP_PORT
              value: "3000"
            - name: GITEA__server__SSH_PORT
              value: "22"
          volumeMounts:
            - name: data
              mountPath: /data
          resources:
            requests:
              cpu: 100m
              memory: 128Mi
            limits:
              cpu: 500m
              memory: 512Mi
          livenessProbe:
            httpGet:
              path: /
              port: 3000
            initialDelaySeconds: 10
            periodSeconds: 10
          readinessProbe:
            httpGet:
              path: /
              port: 3000
            initialDelaySeconds: 5
            periodSeconds: 5
      volumes:
        - name: data
          persistentVolumeClaim:
            claimName: gitea-data
--- a/bootstrap/gitea/ingress.yaml
+++ b/bootstrap/gitea/ingress.yaml
@@ -0,0 +1,24 @@
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: gitea
  namespace: gitea
  annotations:
    cert-manager.io/cluster-issuer: letsencrypt-production
 spec:
  ingressClassName: traefik
  tls:
    - hosts:
        - git.mrt0rtikize.ru
      secretName: gitea-tls
  rules:
    - host: git.mrt0rtikize.ru
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: gitea
                port:
                  number: 3000
--- a/bootstrap/gitea/namespace.yaml
+++ b/bootstrap/gitea/namespace.yaml
@@ -0,0 +1,4 @@
 apiVersion: v1
 kind: Namespace
 metadata:
  name: gitea
--- a/bootstrap/gitea/pvc.yaml
+++ b/bootstrap/gitea/pvc.yaml
@@ -0,0 +1,12 @@
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: gitea-data
  namespace: gitea
 spec:
  storageClassName: longhorn
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 20Gi
--- a/bootstrap/gitea/service.yaml
+++ b/bootstrap/gitea/service.yaml
@@ -0,0 +1,15 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: gitea
  namespace: gitea
 spec:
  selector:
    app: gitea
  ports:
    - name: http
      port: 3000
      targetPort: 3000
    - name: ssh
      port: 22
      targetPort: 22
--- a/infra/argocd/README.md
+++ b/infra/argocd/README.md
--- a/infra/longhorn/README.md
+++ b/infra/longhorn/README.md
--- a/infra/longhorn/longhorn-ingress.yaml
+++ b/infra/longhorn/longhorn-ingress.yaml
@@ -0,0 +1,26 @@
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: longhorn-ingress
  namespace: longhorn-system
  annotations:
    cert-manager.io/cluster-issuer: letsencrypt-production
    traefik.ingress.kubernetes.io/router.entrypoints: websecure
    traefik.ingress.kubernetes.io/router.tls: "true"
 spec:
  ingressClassName: traefik # We use Traefik as the ingress controller
  tls:
    - hosts:
        - longhorn.mrt0rtikize.ru
      secretName: longhorn-tls
  rules:
    - host: longhorn.mrt0rtikize.ru
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: longhorn-frontend # Service managing Longhorn dashboard
                port:
                  number: 80 # Service port where Longhorn UI runs
--- a/infra/longhorn/test-pvc.yaml
+++ b/infra/longhorn/test-pvc.yaml
@@ -0,0 +1,23 @@
 apiVersion: storage.k8s.io/v1
 kind: StorageClass
 metadata:
  name: longhorn
 provisioner: driver.longhorn.io
 parameters:
  numberOfReplicas: '2'
  staleReplicaTimeout: '30'
 allowVolumeExpansion: true
 reclaimPolicy: Retain
 volumeBindingMode: Immediate
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: longhorn-pvc
 spec:
  accessModes:
    - ReadWriteOnce
  storageClassName: longhorn
  resources:
    requests:
      storage: 2Gi
--- a/infra/metallb/README.md
+++ b/infra/metallb/README.md
--- a/infra/metallb/ip-address-pool.yaml
+++ b/infra/metallb/ip-address-pool.yaml
@@ -0,0 +1,10 @@
 apiVersion: metallb.io/v1beta1
 kind: IPAddressPool
 metadata:
  name: default-address-pool
  namespace: metallb-system
 spec:
  addresses:
    - 10.0.0.120-10.0.0.200
  autoAssign: true
  avoidBuggyIPs: true
--- a/infra/metallb/l2advert.yaml
+++ b/infra/metallb/l2advert.yaml
@@ -0,0 +1,8 @@
 apiVersion: metallb.io/v1beta1
 kind: L2Advertisement
 metadata:
  name: default-advertisement
  namespace: metallb-system
 spec:
  ipAddressPools:
    - default-address-pool
--- a/llama/cpu.yaml
+++ b/llama/cpu.yaml
@@ -0,0 +1,147 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: llama-server-cpu
  namespace: llama
 spec:
  replicas: 1
  strategy:
    type: Recreate
  selector:
    matchLabels:
      app: llama-server-cpu
  template:
    metadata:
      labels:
        app: llama-server-cpu
      annotations:
        prometheus.io/scrape: "true"
        prometheus.io/port: "8080"
        prometheus.io/path: "/metrics"
    spec:
      nodeSelector:
        gpu: amd
      initContainers:
        - name: download-model
          image: python:3.11-slim
          env:
            - name: HF_HOME
              value: /models/.hf
            - name: MODEL_REPO
              value: "byteshape/Qwen3-Coder-30B-A3B-Instruct-GGUF"
            - name: MODEL_FILE
              value: "Qwen3-Coder-30B-A3B-Instruct-IQ4_XS-4.20bpw.gguf"
          command:
            - /bin/sh
            - -c
            - |
              set -eux
              MODEL_PATH="/models/${MODEL_FILE}"
              if [ -f "${MODEL_PATH}" ]; then
                echo "Model already exists at ${MODEL_PATH}, skipping download"
                exit 0
              fi
              echo "Installing Hugging Face Hub downloader"
              pip install --no-cache-dir huggingface_hub
              echo "Downloading ${MODEL_REPO}/${MODEL_FILE}"
              python - <<'PY'
              import os
              from huggingface_hub import hf_hub_download
              repo_id = os.environ["MODEL_REPO"]
              filename = os.environ["MODEL_FILE"]
              token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
              path = hf_hub_download(
                  repo_id=repo_id,
                  filename=filename,
                  local_dir="/models",
                  local_dir_use_symlinks=False,
                  token=token,
              )
              print(f"Downloaded to: {path}")
              PY
              ls -lah /models
          volumeMounts:
            - name: models
              mountPath: /models
      containers:
        - name: llama
          image: ghcr.io/ggml-org/llama.cpp:server
          args:
            - "--model"
            - "/models/Qwen3-Coder-30B-A3B-Instruct-IQ4_XS-4.20bpw.gguf"
            - "--host"
            - "0.0.0.0"
            - "--port"
            - "8080"
            - "--metrics"
            - "--ctx-size"
            - "32768"
            - "--parallel"
            - "1"
            - "--cache-type-k"
            - "q8_0"
            - "--cache-type-v"
            - "q8_0"
          ports:
            - name: http
              containerPort: 8080
          volumeMounts:
            - name: models
              mountPath: /models
          resources:
            requests:
              cpu: "8"
              memory: "24Gi"
            limits:
              cpu: "12"
              memory: "24Gi"
      volumes:
        - name: models
          persistentVolumeClaim:
            claimName: llama-cpu-models-pvc
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: llama-server-cpu
  namespace: llama
 spec:
  selector:
    app: llama-server-cpu
  ports:
    - name: http
      port: 8080
      targetPort: http
  type: ClusterIP
 ---
 apiVersion: monitoring.coreos.com/v1
 kind: PodMonitor
 metadata:
  name: llama-server-cpu
  namespace: llama
  labels:
    app: llama-server-cpu
 spec:
  namespaceSelector:
    matchNames:
      - llama
  selector:
    matchLabels:
      app: llama-server-cpu
  podMetricsEndpoints:
    - port: http
      path: /metrics
      interval: 15s
--- a/llama/gpu-exporter.yaml
+++ b/llama/gpu-exporter.yaml
@@ -0,0 +1,62 @@
 apiVersion: apps/v1
 kind: DaemonSet
 metadata:
  name: radeon-exporter
  namespace: llama
  labels:
    app: radeon-exporter
 spec:
  selector:
    matchLabels:
      app: radeon-exporter
  template:
    metadata:
      labels:
        app: radeon-exporter
    spec:
      nodeSelector:
        gpu: amd
      containers:
        - name: radeon-exporter
          image: kmulvey/radeon_exporter:latest
          imagePullPolicy: IfNotPresent
          ports:
            - name: metrics
              containerPort: 9200
          securityContext:
            privileged: true
          volumeMounts:
            - name: sys
              mountPath: /sys
              readOnly: true
            - name: dri
              mountPath: /dev/dri
              readOnly: true
      volumes:
        - name: sys
          hostPath:
            path: /sys
            type: Directory
        - name: dri
          hostPath:
            path: /dev/dri
            type: Directory
 ---
 apiVersion: monitoring.coreos.com/v1
 kind: PodMonitor
 metadata:
  name: radeon-exporter
  namespace: llama
  labels:
    monitoring: primary
 spec:
  namespaceSelector:
    matchNames:
      - llama
  selector:
    matchLabels:
      app: radeon-exporter
  podMetricsEndpoints:
    - port: metrics
      path: /metrics
      interval: 15s
--- a/llama/litellm-db.yaml
+++ b/llama/litellm-db.yaml
@@ -0,0 +1,116 @@
 apiVersion: v1
 kind: Secret
 metadata:
  name: litellm-postgres
  namespace: llama
 type: Opaque
 stringData:
  POSTGRES_DB: litellm
  POSTGRES_USER: litellm
  POSTGRES_PASSWORD: 7792e47efbc7348155f54a15ed34dc1d06716b2b1848711d0ee90e3461883c0d
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: litellm-postgres
  namespace: llama
  labels:
    app.kubernetes.io/name: litellm-postgres
 spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 10Gi
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: litellm-postgres
  namespace: llama
  labels:
    app.kubernetes.io/name: litellm-postgres
    app.kubernetes.io/component: database
 spec:
  replicas: 1
  selector:
    matchLabels:
      app.kubernetes.io/name: litellm-postgres
      app.kubernetes.io/component: database
  template:
    metadata:
      labels:
        app.kubernetes.io/name: litellm-postgres
        app.kubernetes.io/component: database
    spec:
      containers:
        - name: postgres
          image: postgres:16
          imagePullPolicy: IfNotPresent
          ports:
            - name: postgres
              containerPort: 5432
          env:
            - name: POSTGRES_DB
              valueFrom:
                secretKeyRef:
                  name: litellm-postgres
                  key: POSTGRES_DB
            - name: POSTGRES_USER
              valueFrom:
                secretKeyRef:
                  name: litellm-postgres
                  key: POSTGRES_USER
            - name: POSTGRES_PASSWORD
              valueFrom:
                secretKeyRef:
                  name: litellm-postgres
                  key: POSTGRES_PASSWORD
          volumeMounts:
            - name: data
              mountPath: /var/lib/postgresql
          readinessProbe:
            exec:
              command:
                - sh
                - -c
                - pg_isready -U "$POSTGRES_USER" -d "$POSTGRES_DB"
            initialDelaySeconds: 5
            periodSeconds: 10
          livenessProbe:
            exec:
              command:
                - sh
                - -c
                - pg_isready -U "$POSTGRES_USER" -d "$POSTGRES_DB"
            initialDelaySeconds: 20
            periodSeconds: 20
          resources:
            requests:
              cpu: 100m
              memory: 256Mi
            limits:
              cpu: 500m
              memory: 256Mi
      volumes:
        - name: data
          persistentVolumeClaim:
            claimName: litellm-postgres
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: litellm-postgres
  namespace: llama
  labels:
    app.kubernetes.io/name: litellm-postgres
    app.kubernetes.io/component: database
 spec:
  selector:
    app.kubernetes.io/name: litellm-postgres
    app.kubernetes.io/component: database
  ports:
    - name: postgres
      port: 5432
      targetPort: postgres
  type: ClusterIP
--- a/llama/litellm.yaml
+++ b/llama/litellm.yaml
@@ -0,0 +1,202 @@
 apiVersion: v1
 kind: Secret
 metadata:
  name: litellm-secret
  namespace: llama
  labels:
    app.kubernetes.io/name: litellm
    app.kubernetes.io/component: gateway
 type: Opaque
 stringData:
  LITELLM_MASTER_KEY: "6991c7c0f02b4bcf"
 ---
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: litellm-config
  namespace: llama
  labels:
    app.kubernetes.io/name: litellm
    app.kubernetes.io/component: gateway
 data:
  config.yaml: |
    model_list:
      - model_name: fast
        litellm_params:
          model: openai/fast
          api_base: "http://llama-server-gpu.llama.svc.cluster.local:8080/v1"
          api_key: none
      - model_name: smart
        litellm_params:
          model: openai/smart
          api_base: "http://llama-server-cpu.llama.svc.cluster.local:8080/v1"
          api_key: none
      - model_name: rp
        litellm_params:
          model: openai/rp
          api_base: "http://llama-server-gpu-rp.llama.svc.cluster.local:8080/v1"
          api_key: none
    litellm_settings:
      callbacks:
        - prometheus
    general_settings:
      store_model_in_db: true
      store_prompts_in_spend_logs: true
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: litellm
  namespace: llama
  labels:
    app.kubernetes.io/name: litellm
    app.kubernetes.io/component: gateway
    app.kubernetes.io/part-of: llama-stack
    monitoring: prometheus
 spec:
  replicas: 1
  selector:
    matchLabels:
      app.kubernetes.io/name: litellm
      app.kubernetes.io/component: gateway
  template:
    metadata:
      labels:
        app.kubernetes.io/name: litellm
        app.kubernetes.io/component: gateway
        app.kubernetes.io/part-of: llama-stack
        monitoring: prometheus
      annotations:
        prometheus.io/scrape: "true"
        prometheus.io/port: "4000"
        prometheus.io/path: "/metrics"
    spec:
      containers:
        - name: litellm
          image: ghcr.io/berriai/litellm:v1.82.6.rc.3
          imagePullPolicy: IfNotPresent
          args:
            - "--config"
            - "/app/config.yaml"
          env:
            - name: LITELLM_MASTER_KEY
              valueFrom:
                secretKeyRef:
                  name: litellm-secret
                  key: LITELLM_MASTER_KEY
            - name: POSTGRES_USER
              valueFrom:
                secretKeyRef:
                  name: litellm-postgres
                  key: POSTGRES_USER
            - name: POSTGRES_PASSWORD
              valueFrom:
                secretKeyRef:
                  name: litellm-postgres
                  key: POSTGRES_PASSWORD
            - name: POSTGRES_DB
              valueFrom:
                secretKeyRef:
                  name: litellm-postgres
                  key: POSTGRES_DB
            - name: DATABASE_URL
              value: "postgresql://$(POSTGRES_USER):$(POSTGRES_PASSWORD)@litellm-postgres.llama.svc.cluster.local:5432/$(POSTGRES_DB)"
          ports:
            - name: http
              containerPort: 4000
              protocol: TCP
          volumeMounts:
            - name: litellm-config
              mountPath: /app/config.yaml
              subPath: config.yaml
          resources:
            requests:
              cpu: "500m"
              memory: "1Gi"
            limits:
              cpu: "1000m"
              memory: "2Gi"
      volumes:
        - name: litellm-config
          configMap:
            name: litellm-config
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: litellm
  namespace: llama
  labels:
    app.kubernetes.io/name: litellm
    app.kubernetes.io/component: gateway
    app.kubernetes.io/part-of: llama-stack
    monitoring: prometheus
 spec:
  selector:
    app.kubernetes.io/name: litellm
    app.kubernetes.io/component: gateway
  ports:
    - name: http
      port: 4000
      targetPort: http
      protocol: TCP
  type: ClusterIP
 ---
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: litellm
  namespace: llama
  labels:
    app.kubernetes.io/name: litellm
    app.kubernetes.io/component: gateway
    app.kubernetes.io/part-of: llama-stack
  annotations:
    cert-manager.io/cluster-issuer: letsencrypt-production
    traefik.ingress.kubernetes.io/router.entrypoints: websecure
    traefik.ingress.kubernetes.io/router.tls: "true"
 spec:
  ingressClassName: traefik
  tls:
    - hosts:
        - litellm.mrt0rtikize.ru
      secretName: web-echo-tls
  rules:
    - host: litellm.mrt0rtikize.ru
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: litellm
                port:
                  number: 4000
 ---
 apiVersion: monitoring.coreos.com/v1
 kind: PodMonitor
 metadata:
  name: litellm
  namespace: llama
  labels:
    app.kubernetes.io/name: litellm
    app.kubernetes.io/component: gateway
    app.kubernetes.io/part-of: llama-stack
    release: kube-prometheus-stack
 spec:
  namespaceSelector:
    matchNames:
      - llama
  selector:
    matchLabels:
      app.kubernetes.io/name: litellm
      app.kubernetes.io/component: gateway
  podMetricsEndpoints:
    - port: http
      path: /metrics
      interval: 30s
--- a/llama/main.yaml
+++ b/llama/main.yaml
@@ -0,0 +1,166 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: llama-server-gpu
  namespace: llama
 spec:
  replicas: 1
  selector:
    matchLabels:
      app: llama-server-gpu
  template:
    metadata:
      labels:
        app: llama-server-gpu
      annotations:
        prometheus.io/scrape: "true"
        prometheus.io/port: "8080"
        prometheus.io/path: "/metrics"
    spec:
      nodeSelector:
        gpu: amd
      initContainers:
        - name: download-model
          image: python:3.11-slim
          env:
            - name: HF_HOME
              value: /models/.hf
            - name: MODEL_REPO
              value: "byteshape/Devstral-Small-2-24B-Instruct-2512-GGUF"
            - name: MODEL_FILE
              value: "Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf"
            # optional, only if you need gated/private models
            # - name: HUGGING_FACE_HUB_TOKEN
            #   valueFrom:
            #     secretKeyRef:
            #       name: hf-token
            #       key: token
          command:
            - /bin/sh
            - -c
            - |
              set -eux
              MODEL_PATH="/models/${MODEL_FILE}"
              if [ -f "${MODEL_PATH}" ]; then
                echo "Model already exists at ${MODEL_PATH}, skipping download"
                exit 0
              fi
              echo "Installing Hugging Face Hub downloader"
              pip install --no-cache-dir huggingface_hub
              echo "Downloading ${MODEL_REPO}/${MODEL_FILE}"
              python - <<'PY'
              import os
              from huggingface_hub import hf_hub_download
              repo_id = os.environ["MODEL_REPO"]
              filename = os.environ["MODEL_FILE"]
              token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
              path = hf_hub_download(
                  repo_id=repo_id,
                  filename=filename,
                  local_dir="/models",
                  local_dir_use_symlinks=False,
                  token=token,
              )
              print(f"Downloaded to: {path}")
              PY
              ls -lah /models
          volumeMounts:
            - name: models
              mountPath: /models
      containers:
        - name: llama
          image: ghcr.io/ggml-org/llama.cpp:server-vulkan
          args:
            - "--model"
            - "/models/Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf"
            - "--host"
            - "0.0.0.0"
            - "--port"
            - "8080"
            - "--n-gpu-layers"
            - "999"
            - "--metrics"
            # performance tuning
            - "--ctx-size"
            - "32768"
            - "--parallel"
            - "4"
            # KV cache quantization
            - "--cache-type-k"
            - "q8_0"
            - "--cache-type-v"
            - "q8_0"
          ports:
            - name: http
              containerPort: 8080
          securityContext:
            privileged: true
          volumeMounts:
            - name: models
              mountPath: /models
            - name: dri
              mountPath: /dev/dri
          resources:
            requests:
              cpu: "2"
              memory: "4Gi"
            limits:
              cpu: "2"
              memory: "4Gi"
      volumes:
        - name: models
          persistentVolumeClaim:
            claimName: llama-gpu-models-pvc
        - name: dri
          hostPath:
            path: /dev/dri
            type: Directory
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: llama-server-gpu
  namespace: llama
 spec:
  selector:
    app: llama-server-gpu
  ports:
    - name: http
      port: 8080
      targetPort: http
  type: ClusterIP
 ---
 apiVersion: monitoring.coreos.com/v1
 kind: PodMonitor
 metadata:
  name: llama-server-gpu
  namespace: llama
  labels:
    app: llama-server-gpu
 spec:
  namespaceSelector:
    matchNames:
      - llama
  selector:
    matchLabels:
      app: llama-server-gpu
  podMetricsEndpoints:
    - port: http
      path: /metrics
      interval: 15s
--- a/llama/namespace.yaml
+++ b/llama/namespace.yaml
@@ -0,0 +1,42 @@
 apiVersion: v1
 kind: Namespace
 metadata:
  name: llama
 ---
 # apiVersion: storage.k8s.io/v1
 # kind: StorageClass
 # metadata:
 #   name: longhorn-llama
 # provisioner: driver.longhorn.io
 # parameters:
 #   numberOfReplicas: "2"
 #   staleReplicaTimeout: "30"
 # allowVolumeExpansion: true
 # reclaimPolicy: Retain
 # volumeBindingMode: Immediate
 # ---
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: llama-gpu-models-pvc
  namespace: llama
 spec:
  accessModes:
    - ReadWriteOnce
  # storageClassName: longhorn-llama
  resources:
    requests:
      storage: 50Gi
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: llama-cpu-models-pvc
  namespace: llama
 spec:
  accessModes:
    - ReadWriteOnce
  # storageClassName: longhorn-llama
  resources:
    requests:
      storage: 100Gi
--- a/llama/rp.yaml
+++ b/llama/rp.yaml
@@ -0,0 +1,166 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: llama-server-gpu-rp
  namespace: llama
 spec:
  replicas: 1
  selector:
    matchLabels:
      app: llama-server-gpu-rp
  template:
    metadata:
      labels:
        app: llama-server-gpu-rp
      annotations:
        prometheus.io/scrape: "true"
        prometheus.io/port: "8080"
        prometheus.io/path: "/metrics"
    spec:
      nodeSelector:
        gpu: amd
      initContainers:
        - name: download-model
          image: python:3.11-slim
          env:
            - name: HF_HOME
              value: /models/.hf
            - name: MODEL_REPO
              value: "mradermacher/Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B-GGUF"
            - name: MODEL_FILE
              value: "Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B.Q4_K_S.gguf"
            # optional, only if you need gated/private models
            # - name: HUGGING_FACE_HUB_TOKEN
            #   valueFrom:
            #     secretKeyRef:
            #       name: hf-token
            #       key: token
          command:
            - /bin/sh
            - -c
            - |
              set -eux
              MODEL_PATH="/models/${MODEL_FILE}"
              if [ -f "${MODEL_PATH}" ]; then
                echo "Model already exists at ${MODEL_PATH}, skipping download"
                exit 0
              fi
              echo "Installing Hugging Face Hub downloader"
              pip install --no-cache-dir huggingface_hub
              echo "Downloading ${MODEL_REPO}/${MODEL_FILE}"
              python - <<'PY'
              import os
              from huggingface_hub import hf_hub_download
              repo_id = os.environ["MODEL_REPO"]
              filename = os.environ["MODEL_FILE"]
              token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
              path = hf_hub_download(
                  repo_id=repo_id,
                  filename=filename,
                  local_dir="/models",
                  local_dir_use_symlinks=False,
                  token=token,
              )
              print(f"Downloaded to: {path}")
              PY
              ls -lah /models
          volumeMounts:
            - name: models
              mountPath: /models
      containers:
        - name: llama
          image: ghcr.io/ggml-org/llama.cpp:server-vulkan
          args:
            - "--model"
            - "/models/Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B.Q4_K_S.gguf"
            - "--host"
            - "0.0.0.0"
            - "--port"
            - "8080"
            - "--n-gpu-layers"
            - "999"
            - "--metrics"
            # performance tuning
            - "--ctx-size"
            - "32768"
            - "--parallel"
            - "1"
            # KV cache quantization
            - "--cache-type-k"
            - "q8_0"
            - "--cache-type-v"
            - "q8_0"
          ports:
            - name: http
              containerPort: 8080
          securityContext:
            privileged: true
          volumeMounts:
            - name: models
              mountPath: /models
            - name: dri
              mountPath: /dev/dri
          resources:
            requests:
              cpu: "2"
              memory: "4Gi"
            limits:
              cpu: "2"
              memory: "4Gi"
      volumes:
        - name: models
          persistentVolumeClaim:
            claimName: llama-gpu-models-pvc
        - name: dri
          hostPath:
            path: /dev/dri
            type: Directory
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: llama-server-gpu-rp
  namespace: llama
 spec:
  selector:
    app: llama-server-gpu-rp
  ports:
    - name: http
      port: 8080
      targetPort: http
  type: ClusterIP
 ---
 apiVersion: monitoring.coreos.com/v1
 kind: PodMonitor
 metadata:
  name: llama-server-gpu-rp
  namespace: llama
  labels:
    app: llama-server-gpu-rp
 spec:
  namespaceSelector:
    matchNames:
      - llama
  selector:
    matchLabels:
      app: llama-server-gpu-rp
  podMetricsEndpoints:
    - port: http
      path: /metrics
      interval: 15s
--- a/manifests/llama/cpu.yaml
+++ b/manifests/llama/cpu.yaml
@@ -0,0 +1,147 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: llama-server-cpu
  namespace: llama
 spec:
  replicas: 1
  strategy:
    type: Recreate
  selector:
    matchLabels:
      app: llama-server-cpu
  template:
    metadata:
      labels:
        app: llama-server-cpu
      annotations:
        prometheus.io/scrape: "true"
        prometheus.io/port: "8080"
        prometheus.io/path: "/metrics"
    spec:
      nodeSelector:
        gpu: amd
      initContainers:
        - name: download-model
          image: python:3.11-slim
          env:
            - name: HF_HOME
              value: /models/.hf
            - name: MODEL_REPO
              value: "byteshape/Qwen3-Coder-30B-A3B-Instruct-GGUF"
            - name: MODEL_FILE
              value: "Qwen3-Coder-30B-A3B-Instruct-IQ4_XS-4.20bpw.gguf"
          command:
            - /bin/sh
            - -c
            - |
              set -eux
              MODEL_PATH="/models/${MODEL_FILE}"
              if [ -f "${MODEL_PATH}" ]; then
                echo "Model already exists at ${MODEL_PATH}, skipping download"
                exit 0
              fi
              echo "Installing Hugging Face Hub downloader"
              pip install --no-cache-dir huggingface_hub
              echo "Downloading ${MODEL_REPO}/${MODEL_FILE}"
              python - <<'PY'
              import os
              from huggingface_hub import hf_hub_download
              repo_id = os.environ["MODEL_REPO"]
              filename = os.environ["MODEL_FILE"]
              token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
              path = hf_hub_download(
                  repo_id=repo_id,
                  filename=filename,
                  local_dir="/models",
                  local_dir_use_symlinks=False,
                  token=token,
              )
              print(f"Downloaded to: {path}")
              PY
              ls -lah /models
          volumeMounts:
            - name: models
              mountPath: /models
      containers:
        - name: llama
          image: ghcr.io/ggml-org/llama.cpp:server
          args:
            - "--model"
            - "/models/Qwen3-Coder-30B-A3B-Instruct-IQ4_XS-4.20bpw.gguf"
            - "--host"
            - "0.0.0.0"
            - "--port"
            - "8080"
            - "--metrics"
            - "--ctx-size"
            - "32768"
            - "--parallel"
            - "1"
            - "--cache-type-k"
            - "q8_0"
            - "--cache-type-v"
            - "q8_0"
          ports:
            - name: http
              containerPort: 8080
          volumeMounts:
            - name: models
              mountPath: /models
          resources:
            requests:
              cpu: "8"
              memory: "24Gi"
            limits:
              cpu: "12"
              memory: "24Gi"
      volumes:
        - name: models
          persistentVolumeClaim:
            claimName: llama-cpu-models-pvc
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: llama-server-cpu
  namespace: llama
 spec:
  selector:
    app: llama-server-cpu
  ports:
    - name: http
      port: 8080
      targetPort: http
  type: ClusterIP
 ---
 apiVersion: monitoring.coreos.com/v1
 kind: PodMonitor
 metadata:
  name: llama-server-cpu
  namespace: llama
  labels:
    app: llama-server-cpu
 spec:
  namespaceSelector:
    matchNames:
      - llama
  selector:
    matchLabels:
      app: llama-server-cpu
  podMetricsEndpoints:
    - port: http
      path: /metrics
      interval: 15s
--- a/manifests/llama/gpu-exporter.yaml
+++ b/manifests/llama/gpu-exporter.yaml
@@ -0,0 +1,62 @@
 apiVersion: apps/v1
 kind: DaemonSet
 metadata:
  name: radeon-exporter
  namespace: llama
  labels:
    app: radeon-exporter
 spec:
  selector:
    matchLabels:
      app: radeon-exporter
  template:
    metadata:
      labels:
        app: radeon-exporter
    spec:
      nodeSelector:
        gpu: amd
      containers:
        - name: radeon-exporter
          image: kmulvey/radeon_exporter:latest
          imagePullPolicy: IfNotPresent
          ports:
            - name: metrics
              containerPort: 9200
          securityContext:
            privileged: true
          volumeMounts:
            - name: sys
              mountPath: /sys
              readOnly: true
            - name: dri
              mountPath: /dev/dri
              readOnly: true
      volumes:
        - name: sys
          hostPath:
            path: /sys
            type: Directory
        - name: dri
          hostPath:
            path: /dev/dri
            type: Directory
 ---
 apiVersion: monitoring.coreos.com/v1
 kind: PodMonitor
 metadata:
  name: radeon-exporter
  namespace: llama
  labels:
    monitoring: primary
 spec:
  namespaceSelector:
    matchNames:
      - llama
  selector:
    matchLabels:
      app: radeon-exporter
  podMetricsEndpoints:
    - port: metrics
      path: /metrics
      interval: 15s
--- a/manifests/llama/litellm-db.yaml
+++ b/manifests/llama/litellm-db.yaml
@@ -0,0 +1,116 @@
 apiVersion: v1
 kind: Secret
 metadata:
  name: litellm-postgres
  namespace: llama
 type: Opaque
 stringData:
  POSTGRES_DB: litellm
  POSTGRES_USER: litellm
  POSTGRES_PASSWORD: 7792e47efbc7348155f54a15ed34dc1d06716b2b1848711d0ee90e3461883c0d
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: litellm-postgres
  namespace: llama
  labels:
    app.kubernetes.io/name: litellm-postgres
 spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 10Gi
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: litellm-postgres
  namespace: llama
  labels:
    app.kubernetes.io/name: litellm-postgres
    app.kubernetes.io/component: database
 spec:
  replicas: 1
  selector:
    matchLabels:
      app.kubernetes.io/name: litellm-postgres
      app.kubernetes.io/component: database
  template:
    metadata:
      labels:
        app.kubernetes.io/name: litellm-postgres
        app.kubernetes.io/component: database
    spec:
      containers:
        - name: postgres
          image: postgres:16
          imagePullPolicy: IfNotPresent
          ports:
            - name: postgres
              containerPort: 5432
          env:
            - name: POSTGRES_DB
              valueFrom:
                secretKeyRef:
                  name: litellm-postgres
                  key: POSTGRES_DB
            - name: POSTGRES_USER
              valueFrom:
                secretKeyRef:
                  name: litellm-postgres
                  key: POSTGRES_USER
            - name: POSTGRES_PASSWORD
              valueFrom:
                secretKeyRef:
                  name: litellm-postgres
                  key: POSTGRES_PASSWORD
          volumeMounts:
            - name: data
              mountPath: /var/lib/postgresql
          readinessProbe:
            exec:
              command:
                - sh
                - -c
                - pg_isready -U "$POSTGRES_USER" -d "$POSTGRES_DB"
            initialDelaySeconds: 5
            periodSeconds: 10
          livenessProbe:
            exec:
              command:
                - sh
                - -c
                - pg_isready -U "$POSTGRES_USER" -d "$POSTGRES_DB"
            initialDelaySeconds: 20
            periodSeconds: 20
          resources:
            requests:
              cpu: 100m
              memory: 256Mi
            limits:
              cpu: 500m
              memory: 256Mi
      volumes:
        - name: data
          persistentVolumeClaim:
            claimName: litellm-postgres
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: litellm-postgres
  namespace: llama
  labels:
    app.kubernetes.io/name: litellm-postgres
    app.kubernetes.io/component: database
 spec:
  selector:
    app.kubernetes.io/name: litellm-postgres
    app.kubernetes.io/component: database
  ports:
    - name: postgres
      port: 5432
      targetPort: postgres
  type: ClusterIP
--- a/manifests/llama/litellm.yaml
+++ b/manifests/llama/litellm.yaml
@@ -0,0 +1,202 @@
 apiVersion: v1
 kind: Secret
 metadata:
  name: litellm-secret
  namespace: llama
  labels:
    app.kubernetes.io/name: litellm
    app.kubernetes.io/component: gateway
 type: Opaque
 stringData:
  LITELLM_MASTER_KEY: "6991c7c0f02b4bcf"
 ---
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: litellm-config
  namespace: llama
  labels:
    app.kubernetes.io/name: litellm
    app.kubernetes.io/component: gateway
 data:
  config.yaml: |
    model_list:
      - model_name: fast
        litellm_params:
          model: openai/fast
          api_base: "http://llama-server-gpu.llama.svc.cluster.local:8080/v1"
          api_key: none
      - model_name: smart
        litellm_params:
          model: openai/smart
          api_base: "http://llama-server-cpu.llama.svc.cluster.local:8080/v1"
          api_key: none
      - model_name: rp
        litellm_params:
          model: openai/rp
          api_base: "http://llama-server-gpu-rp.llama.svc.cluster.local:8080/v1"
          api_key: none
    litellm_settings:
      callbacks:
        - prometheus
    general_settings:
      store_model_in_db: true
      store_prompts_in_spend_logs: true
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: litellm
  namespace: llama
  labels:
    app.kubernetes.io/name: litellm
    app.kubernetes.io/component: gateway
    app.kubernetes.io/part-of: llama-stack
    monitoring: prometheus
 spec:
  replicas: 1
  selector:
    matchLabels:
      app.kubernetes.io/name: litellm
      app.kubernetes.io/component: gateway
  template:
    metadata:
      labels:
        app.kubernetes.io/name: litellm
        app.kubernetes.io/component: gateway
        app.kubernetes.io/part-of: llama-stack
        monitoring: prometheus
      annotations:
        prometheus.io/scrape: "true"
        prometheus.io/port: "4000"
        prometheus.io/path: "/metrics"
    spec:
      containers:
        - name: litellm
          image: ghcr.io/berriai/litellm:v1.82.6.rc.3
          imagePullPolicy: IfNotPresent
          args:
            - "--config"
            - "/app/config.yaml"
          env:
            - name: LITELLM_MASTER_KEY
              valueFrom:
                secretKeyRef:
                  name: litellm-secret
                  key: LITELLM_MASTER_KEY
            - name: POSTGRES_USER
              valueFrom:
                secretKeyRef:
                  name: litellm-postgres
                  key: POSTGRES_USER
            - name: POSTGRES_PASSWORD
              valueFrom:
                secretKeyRef:
                  name: litellm-postgres
                  key: POSTGRES_PASSWORD
            - name: POSTGRES_DB
              valueFrom:
                secretKeyRef:
                  name: litellm-postgres
                  key: POSTGRES_DB
            - name: DATABASE_URL
              value: "postgresql://$(POSTGRES_USER):$(POSTGRES_PASSWORD)@litellm-postgres.llama.svc.cluster.local:5432/$(POSTGRES_DB)"
          ports:
            - name: http
              containerPort: 4000
              protocol: TCP
          volumeMounts:
            - name: litellm-config
              mountPath: /app/config.yaml
              subPath: config.yaml
          resources:
            requests:
              cpu: "500m"
              memory: "1Gi"
            limits:
              cpu: "1000m"
              memory: "2Gi"
      volumes:
        - name: litellm-config
          configMap:
            name: litellm-config
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: litellm
  namespace: llama
  labels:
    app.kubernetes.io/name: litellm
    app.kubernetes.io/component: gateway
    app.kubernetes.io/part-of: llama-stack
    monitoring: prometheus
 spec:
  selector:
    app.kubernetes.io/name: litellm
    app.kubernetes.io/component: gateway
  ports:
    - name: http
      port: 4000
      targetPort: http
      protocol: TCP
  type: ClusterIP
 ---
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: litellm
  namespace: llama
  labels:
    app.kubernetes.io/name: litellm
    app.kubernetes.io/component: gateway
    app.kubernetes.io/part-of: llama-stack
  annotations:
    cert-manager.io/cluster-issuer: letsencrypt-production
    traefik.ingress.kubernetes.io/router.entrypoints: websecure
    traefik.ingress.kubernetes.io/router.tls: "true"
 spec:
  ingressClassName: traefik
  tls:
    - hosts:
        - litellm.mrt0rtikize.ru
      secretName: web-echo-tls
  rules:
    - host: litellm.mrt0rtikize.ru
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: litellm
                port:
                  number: 4000
 ---
 apiVersion: monitoring.coreos.com/v1
 kind: PodMonitor
 metadata:
  name: litellm
  namespace: llama
  labels:
    app.kubernetes.io/name: litellm
    app.kubernetes.io/component: gateway
    app.kubernetes.io/part-of: llama-stack
    release: kube-prometheus-stack
 spec:
  namespaceSelector:
    matchNames:
      - llama
  selector:
    matchLabels:
      app.kubernetes.io/name: litellm
      app.kubernetes.io/component: gateway
  podMetricsEndpoints:
    - port: http
      path: /metrics
      interval: 30s
--- a/manifests/llama/main.yaml
+++ b/manifests/llama/main.yaml
@@ -0,0 +1,166 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: llama-server-gpu
  namespace: llama
 spec:
  replicas: 1
  selector:
    matchLabels:
      app: llama-server-gpu
  template:
    metadata:
      labels:
        app: llama-server-gpu
      annotations:
        prometheus.io/scrape: "true"
        prometheus.io/port: "8080"
        prometheus.io/path: "/metrics"
    spec:
      nodeSelector:
        gpu: amd
      initContainers:
        - name: download-model
          image: python:3.11-slim
          env:
            - name: HF_HOME
              value: /models/.hf
            - name: MODEL_REPO
              value: "byteshape/Devstral-Small-2-24B-Instruct-2512-GGUF"
            - name: MODEL_FILE
              value: "Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf"
            # optional, only if you need gated/private models
            # - name: HUGGING_FACE_HUB_TOKEN
            #   valueFrom:
            #     secretKeyRef:
            #       name: hf-token
            #       key: token
          command:
            - /bin/sh
            - -c
            - |
              set -eux
              MODEL_PATH="/models/${MODEL_FILE}"
              if [ -f "${MODEL_PATH}" ]; then
                echo "Model already exists at ${MODEL_PATH}, skipping download"
                exit 0
              fi
              echo "Installing Hugging Face Hub downloader"
              pip install --no-cache-dir huggingface_hub
              echo "Downloading ${MODEL_REPO}/${MODEL_FILE}"
              python - <<'PY'
              import os
              from huggingface_hub import hf_hub_download
              repo_id = os.environ["MODEL_REPO"]
              filename = os.environ["MODEL_FILE"]
              token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
              path = hf_hub_download(
                  repo_id=repo_id,
                  filename=filename,
                  local_dir="/models",
                  local_dir_use_symlinks=False,
                  token=token,
              )
              print(f"Downloaded to: {path}")
              PY
              ls -lah /models
          volumeMounts:
            - name: models
              mountPath: /models
      containers:
        - name: llama
          image: ghcr.io/ggml-org/llama.cpp:server-vulkan
          args:
            - "--model"
            - "/models/Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf"
            - "--host"
            - "0.0.0.0"
            - "--port"
            - "8080"
            - "--n-gpu-layers"
            - "999"
            - "--metrics"
            # performance tuning
            - "--ctx-size"
            - "32768"
            - "--parallel"
            - "4"
            # KV cache quantization
            - "--cache-type-k"
            - "q8_0"
            - "--cache-type-v"
            - "q8_0"
          ports:
            - name: http
              containerPort: 8080
          securityContext:
            privileged: true
          volumeMounts:
            - name: models
              mountPath: /models
            - name: dri
              mountPath: /dev/dri
          resources:
            requests:
              cpu: "2"
              memory: "4Gi"
            limits:
              cpu: "2"
              memory: "4Gi"
      volumes:
        - name: models
          persistentVolumeClaim:
            claimName: llama-gpu-models-pvc
        - name: dri
          hostPath:
            path: /dev/dri
            type: Directory
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: llama-server-gpu
  namespace: llama
 spec:
  selector:
    app: llama-server-gpu
  ports:
    - name: http
      port: 8080
      targetPort: http
  type: ClusterIP
 ---
 apiVersion: monitoring.coreos.com/v1
 kind: PodMonitor
 metadata:
  name: llama-server-gpu
  namespace: llama
  labels:
    app: llama-server-gpu
 spec:
  namespaceSelector:
    matchNames:
      - llama
  selector:
    matchLabels:
      app: llama-server-gpu
  podMetricsEndpoints:
    - port: http
      path: /metrics
      interval: 15s
--- a/manifests/llama/namespace.yaml
+++ b/manifests/llama/namespace.yaml
@@ -0,0 +1,42 @@
 apiVersion: v1
 kind: Namespace
 metadata:
  name: llama
 ---
 # apiVersion: storage.k8s.io/v1
 # kind: StorageClass
 # metadata:
 #   name: longhorn-llama
 # provisioner: driver.longhorn.io
 # parameters:
 #   numberOfReplicas: "2"
 #   staleReplicaTimeout: "30"
 # allowVolumeExpansion: true
 # reclaimPolicy: Retain
 # volumeBindingMode: Immediate
 # ---
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: llama-gpu-models-pvc
  namespace: llama
 spec:
  accessModes:
    - ReadWriteOnce
  # storageClassName: longhorn-llama
  resources:
    requests:
      storage: 50Gi
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: llama-cpu-models-pvc
  namespace: llama
 spec:
  accessModes:
    - ReadWriteOnce
  # storageClassName: longhorn-llama
  resources:
    requests:
      storage: 100Gi
--- a/manifests/llama/rp.yaml
+++ b/manifests/llama/rp.yaml
@@ -0,0 +1,166 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: llama-server-gpu-rp
  namespace: llama
 spec:
  replicas: 1
  selector:
    matchLabels:
      app: llama-server-gpu-rp
  template:
    metadata:
      labels:
        app: llama-server-gpu-rp
      annotations:
        prometheus.io/scrape: "true"
        prometheus.io/port: "8080"
        prometheus.io/path: "/metrics"
    spec:
      nodeSelector:
        gpu: amd
      initContainers:
        - name: download-model
          image: python:3.11-slim
          env:
            - name: HF_HOME
              value: /models/.hf
            - name: MODEL_REPO
              value: "mradermacher/Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B-GGUF"
            - name: MODEL_FILE
              value: "Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B.Q4_K_S.gguf"
            # optional, only if you need gated/private models
            # - name: HUGGING_FACE_HUB_TOKEN
            #   valueFrom:
            #     secretKeyRef:
            #       name: hf-token
            #       key: token
          command:
            - /bin/sh
            - -c
            - |
              set -eux
              MODEL_PATH="/models/${MODEL_FILE}"
              if [ -f "${MODEL_PATH}" ]; then
                echo "Model already exists at ${MODEL_PATH}, skipping download"
                exit 0
              fi
              echo "Installing Hugging Face Hub downloader"
              pip install --no-cache-dir huggingface_hub
              echo "Downloading ${MODEL_REPO}/${MODEL_FILE}"
              python - <<'PY'
              import os
              from huggingface_hub import hf_hub_download
              repo_id = os.environ["MODEL_REPO"]
              filename = os.environ["MODEL_FILE"]
              token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
              path = hf_hub_download(
                  repo_id=repo_id,
                  filename=filename,
                  local_dir="/models",
                  local_dir_use_symlinks=False,
                  token=token,
              )
              print(f"Downloaded to: {path}")
              PY
              ls -lah /models
          volumeMounts:
            - name: models
              mountPath: /models
      containers:
        - name: llama
          image: ghcr.io/ggml-org/llama.cpp:server-vulkan
          args:
            - "--model"
            - "/models/Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B.Q4_K_S.gguf"
            - "--host"
            - "0.0.0.0"
            - "--port"
            - "8080"
            - "--n-gpu-layers"
            - "999"
            - "--metrics"
            # performance tuning
            - "--ctx-size"
            - "32768"
            - "--parallel"
            - "1"
            # KV cache quantization
            - "--cache-type-k"
            - "q8_0"
            - "--cache-type-v"
            - "q8_0"
          ports:
            - name: http
              containerPort: 8080
          securityContext:
            privileged: true
          volumeMounts:
            - name: models
              mountPath: /models
            - name: dri
              mountPath: /dev/dri
          resources:
            requests:
              cpu: "2"
              memory: "4Gi"
            limits:
              cpu: "2"
              memory: "4Gi"
      volumes:
        - name: models
          persistentVolumeClaim:
            claimName: llama-gpu-models-pvc
        - name: dri
          hostPath:
            path: /dev/dri
            type: Directory
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: llama-server-gpu-rp
  namespace: llama
 spec:
  selector:
    app: llama-server-gpu-rp
  ports:
    - name: http
      port: 8080
      targetPort: http
  type: ClusterIP
 ---
 apiVersion: monitoring.coreos.com/v1
 kind: PodMonitor
 metadata:
  name: llama-server-gpu-rp
  namespace: llama
  labels:
    app: llama-server-gpu-rp
 spec:
  namespaceSelector:
    matchNames:
      - llama
  selector:
    matchLabels:
      app: llama-server-gpu-rp
  podMetricsEndpoints:
    - port: http
      path: /metrics
      interval: 15s
--- a/manifests/longhorn/longhorn-ingress.yaml
+++ b/manifests/longhorn/longhorn-ingress.yaml
@@ -0,0 +1,26 @@
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: longhorn-ingress
  namespace: longhorn-system
  annotations:
    cert-manager.io/cluster-issuer: letsencrypt-production
    traefik.ingress.kubernetes.io/router.entrypoints: websecure
    traefik.ingress.kubernetes.io/router.tls: "true"
 spec:
  ingressClassName: traefik # We use Traefik as the ingress controller
  tls:
    - hosts:
        - longhorn.mrt0rtikize.ru
      secretName: longhorn-tls
  rules:
    - host: longhorn.mrt0rtikize.ru
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: longhorn-frontend # Service managing Longhorn dashboard
                port:
                  number: 80 # Service port where Longhorn UI runs
--- a/manifests/longhorn/test-pvc.yaml
+++ b/manifests/longhorn/test-pvc.yaml
@@ -0,0 +1,23 @@
 apiVersion: storage.k8s.io/v1
 kind: StorageClass
 metadata:
  name: longhorn
 provisioner: driver.longhorn.io
 parameters:
  numberOfReplicas: '2'
  staleReplicaTimeout: '30'
 allowVolumeExpansion: true
 reclaimPolicy: Retain
 volumeBindingMode: Immediate
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: longhorn-pvc
 spec:
  accessModes:
    - ReadWriteOnce
  storageClassName: longhorn
  resources:
    requests:
      storage: 2Gi
--- a/manifests/metallb/ip-address-pool.yaml
+++ b/manifests/metallb/ip-address-pool.yaml
@@ -0,0 +1,10 @@
 apiVersion: metallb.io/v1beta1
 kind: IPAddressPool
 metadata:
  name: default-address-pool
  namespace: metallb-system
 spec:
  addresses:
    - 10.0.0.120-10.0.0.200
  autoAssign: true
  avoidBuggyIPs: true
--- a/manifests/metallb/l2advert.yaml
+++ b/manifests/metallb/l2advert.yaml
@@ -0,0 +1,8 @@
 apiVersion: metallb.io/v1beta1
 kind: L2Advertisement
 metadata:
  name: default-advertisement
  namespace: metallb-system
 spec:
  ipAddressPools:
    - default-address-pool
--- a/manifests/metrics/grafana-ingress.yaml
+++ b/manifests/metrics/grafana-ingress.yaml
@@ -0,0 +1,26 @@
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: grafana
  namespace: metrics
  annotations:
    cert-manager.io/cluster-issuer: letsencrypt-production
    traefik.ingress.kubernetes.io/router.entrypoints: websecure
    traefik.ingress.kubernetes.io/router.tls: "true"
 spec:
  ingressClassName: traefik
  tls:
    - hosts:
        - grafana.mrt0rtikize.ru
      secretName: grafana-tls
  rules:
    - host: grafana.mrt0rtikize.ru
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: kube-prometheus-stack-grafana
                port:
                  number: 80
--- a/manifests/metrics/kube-prometheus-stack-values.yaml
+++ b/manifests/metrics/kube-prometheus-stack-values.yaml
@@ -0,0 +1,90 @@
 fullnameOverride: kube-prometheus
 namespaceOverride: metrics
 prometheusOperator:
  namespace: metrics
  admissionWebhooks:
    failurePolicy: Ignore
 alertmanager:
  enabled: true
  alertmanagerSpec:
    resources:
      requests:
        cpu: 50m
        memory: 128Mi
      limits:
        cpu: 200m
        memory: 512Mi
    storage:
      volumeClaimTemplate:
        spec:
          accessModes:
            - ReadWriteOnce
          resources:
            requests:
              storage: 10Gi
 prometheus:
  enabled: true
  prometheusSpec:
    replicas: 1
    retention: 15d
    walCompression: true
    serviceMonitorSelectorNilUsesHelmValues: false
    podMonitorSelectorNilUsesHelmValues: false
    resources:
      requests:
        cpu: 100m
        memory: 512Mi
      limits:
        cpu: 1000m
        memory: 1Gi
    storageSpec:
      volumeClaimTemplate:
        spec:
          accessModes:
            - ReadWriteOnce
          resources:
            requests:
              storage: 50Gi
    remoteWrite:
      - url: http://victoria-metrics.metrics.svc.cluster.local:8428/api/v1/write
        queueConfig:
          maxSamplesPerSend: 10000
          capacity: 5000
          maxShards: 30
 kubeEtcd:
  enabled: false
 kubeControllerManager:
  enabled: false
 kubeScheduler:
  enabled: false
 kubeProxy:
  enabled: false
 grafana:
  enabled: true
  adminUser: admin
  adminPassword: change-me
  defaultDashboardsEnabled: true
  resources:
    requests:
      cpu: 50m
      memory: 256Mi
    limits:
      cpu: 200m
      memory: 512Mi
  persistence:
    enabled: true
    size: 10Gi
  additionalDataSources:
    - name: victoria-metrics
      type: prometheus
      access: proxy
      url: http://victoria-metrics.metrics.svc.cluster.local:8428
      isDefault: false
--- a/manifests/metrics/namespace.yaml
+++ b/manifests/metrics/namespace.yaml
@@ -0,0 +1,4 @@
 apiVersion: v1
 kind: Namespace
 metadata:
  name: metrics
--- a/manifests/metrics/victoria-metrics-service.yaml
+++ b/manifests/metrics/victoria-metrics-service.yaml
@@ -0,0 +1,19 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: victoria-metrics
  namespace: metrics
  labels:
    app.kubernetes.io/name: victoria-metrics-single
    app.kubernetes.io/instance: victoria-metrics-single
    app: server
 spec:
  selector:
    app.kubernetes.io/name: victoria-metrics-single
    app.kubernetes.io/instance: victoria-metrics-single
    app: server
  ports:
    - name: http
      port: 8428
      targetPort: 8428
  type: ClusterIP
--- a/manifests/metrics/victoria-metrics-single-values.yaml
+++ b/manifests/metrics/victoria-metrics-single-values.yaml
@@ -0,0 +1,23 @@
 fullnameOverride: victoria-metrics-single
 namespaceOverride: metrics
 server:
  retentionPeriod: 30d
  scrapeInterval: 30s
  replicaCount: 1
  persistentVolume:
    enabled: true
    size: 200Gi
  resources:
    requests:
      cpu: 100m
      memory: 256Mi
    limits:
      cpu: 500m
      memory: 1Gi
  service:
    type: ClusterIP
    port: 8428
 serviceAccount:
  create: true
--- a/manifests/sillytavern/configmap.yaml
+++ b/manifests/sillytavern/configmap.yaml
@@ -0,0 +1,122 @@
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: sillytavern-config
  namespace: sillytavern
 data:
  config.yaml: |
    dataRoot: ./data
    listen: true
    listenAddress:
      ipv4: 0.0.0.0
      ipv6: '[::]'
    protocol:
        ipv4: true
        ipv6: false
    dnsPreferIPv6: false
    browserLaunch:
      enabled: false
      browser: 'default'
      hostname: 'auto'
      port: -1
      avoidLocalhost: false
    port: 8000
    ssl:
      enabled: false
      certPath: "./certs/cert.pem"
      keyPath: "./certs/privkey.pem"
      keyPassphrase: ""
    whitelistMode: false
    enableForwardedWhitelist: false
    whitelist:
      - ::1
      - 127.0.0.1
    whitelistDockerHosts: false
    basicAuthMode: false
    basicAuthUser:
      username: "user"
      password: "password"
    enableCorsProxy: false
    requestProxy:
      enabled: false
      url: "socks5://username:password@example.com:1080"
      bypass:
        - localhost
        - 127.0.0.1
    enableUserAccounts: false
    enableDiscreetLogin: false
    perUserBasicAuth: false
    sso:
      autheliaAuth: false
      authentikAuth: false
    hostWhitelist:
      enabled: false
      scan: true
      hosts: []
    sessionTimeout: -1
    disableCsrfProtection: false
    securityOverride: false
    logging:
      enableAccessLog: true
      minLogLevel: 0
    rateLimiting:
      preferRealIpHeader: false
    backups:
      common:
        numberOfBackups: 50
      chat:
        enabled: true
        checkIntegrity: true
        maxTotalBackups: -1
        throttleInterval: 10000
    thumbnails:
      enabled: true
      format: "jpg"
      quality: 95
      dimensions: { 'bg': [160, 90], 'avatar': [96, 144], 'persona': [96, 144] }
    performance:
      lazyLoadCharacters: false
      memoryCacheCapacity: '100mb'
      useDiskCache: true
    cacheBuster:
      enabled: false
      userAgentPattern: ''
    allowKeysExposure: false
    skipContentCheck: false
    whitelistImportDomains:
      - localhost
      - cdn.discordapp.com
      - files.catbox.moe
      - raw.githubusercontent.com
      - char-archive.evulid.cc
    requestOverrides: []
    extensions:
      enabled: true
      autoUpdate: true
      models:
        autoDownload: true
        classification: Cohee/distilbert-base-uncased-go-emotions-onnx
        captioning: Xenova/vit-gpt2-image-captioning
        embedding: Cohee/jina-embeddings-v2-base-en
        speechToText: Xenova/whisper-small
        textToSpeech: Xenova/speecht5_tts
    enableDownloadableTokenizers: true
    promptPlaceholder: "[Start a new chat]"
    openai:
      randomizeUserId: false
      captionSystemPrompt: ""
    deepl:
      formality: default
    mistral:
      enablePrefix: false
    ollama:
      keepAlive: -1
      batchSize: -1
    claude:
      enableSystemPromptCache: false
      cachingAtDepth: -1
      extendedTTL: false
    gemini:
      apiVersion: 'v1beta'
    enableServerPlugins: false
    enableServerPluginsAutoUpdate: true
--- a/manifests/sillytavern/deployment.yaml
+++ b/manifests/sillytavern/deployment.yaml
@@ -0,0 +1,61 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: sillytavern
  namespace: sillytavern
 spec:
  replicas: 1
  strategy:
    type: Recreate
  selector:
    matchLabels:
      app: sillytavern
  template:
    metadata:
      labels:
        app: sillytavern
    spec:
      containers:
        - name: sillytavern
          image: ghcr.io/sillytavern/sillytavern:latest
          ports:
            - containerPort: 8000
              protocol: TCP
          env:
            - name: NODE_ENV
              value: production
            - name: FORCE_COLOR
              value: "1"
          envFrom:
            - secretRef:
                name: sillytavern-auth
          volumeMounts:
            - name: config
              mountPath: /home/node/app/config/config.yaml
              subPath: config.yaml
            - name: data
              mountPath: /home/node/app/data
            - name: plugins
              mountPath: /home/node/app/plugins
            - name: extensions
              mountPath: /home/node/app/public/scripts/extensions/third-party
          resources:
            requests:
              cpu: "1"
              memory: 1Gi
            limits:
              cpu: "4"
              memory: 4Gi
      volumes:
        - name: config
          configMap:
            name: sillytavern-config
        - name: data
          persistentVolumeClaim:
            claimName: sillytavern-data
        - name: plugins
          persistentVolumeClaim:
            claimName: sillytavern-plugins
        - name: extensions
          persistentVolumeClaim:
            claimName: sillytavern-extensions
--- a/manifests/sillytavern/ingress.yaml
+++ b/manifests/sillytavern/ingress.yaml
@@ -0,0 +1,30 @@
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: sillytavern
  namespace: sillytavern
  labels:
    app.kubernetes.io/name: sillytavern
    app.kubernetes.io/component: frontend
    app.kubernetes.io/part-of: sillytavern
  annotations:
    cert-manager.io/cluster-issuer: letsencrypt-production
    traefik.ingress.kubernetes.io/router.entrypoints: websecure
    traefik.ingress.kubernetes.io/router.tls: "true"
 spec:
  ingressClassName: traefik
  tls:
    - hosts:
        - sillytavern.mrt0rtikize.ru
      secretName: sillytavern-tls
  rules:
    - host: sillytavern.mrt0rtikize.ru
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: sillytavern
                port:
                  number: 8000
--- a/manifests/sillytavern/namespace.yaml
+++ b/manifests/sillytavern/namespace.yaml
@@ -0,0 +1,4 @@
 apiVersion: v1
 kind: Namespace
 metadata:
  name: sillytavern
--- a/manifests/sillytavern/pvc.yaml
+++ b/manifests/sillytavern/pvc.yaml
@@ -0,0 +1,35 @@
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: sillytavern-data
  namespace: sillytavern
 spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 5Gi
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: sillytavern-plugins
  namespace: sillytavern
 spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 1Gi
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: sillytavern-extensions
  namespace: sillytavern
 spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 1Gi
--- a/manifests/sillytavern/secret.yaml
+++ b/manifests/sillytavern/secret.yaml
@@ -0,0 +1,10 @@
 apiVersion: v1
 kind: Secret
 metadata:
  name: sillytavern-auth
  namespace: sillytavern
 type: Opaque
 stringData:
  SILLYTAVERN_BASICAUTHMODE: "true"
  SILLYTAVERN_BASICAUTHUSER_USERNAME: admin
  SILLYTAVERN_BASICAUTHUSER_PASSWORD: 0cdaa30c396dae77
--- a/manifests/sillytavern/service.yaml
+++ b/manifests/sillytavern/service.yaml
@@ -0,0 +1,13 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: sillytavern
  namespace: sillytavern
 spec:
  selector:
    app: sillytavern
  ports:
    - port: 8000
      targetPort: 8000
      protocol: TCP
  type: ClusterIP
--- a/metrics/README.md
+++ b/metrics/README.md
@@ -0,0 +1,62 @@
 # metrics stack
 Opinionated manifests for deploying kube-prometheus-stack (Prometheus Operator + Grafana) together with a VictoriaMetrics single-node database in the `metrics` namespace.
 ## Install / upgrade
 ```sh
 kubectl apply -f metrics/namespace.yaml
 # kube-prometheus-stack
 target=sc prometheus-community
 helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
 helm repo update
 helm upgrade --install kube-prometheus-stack prometheus-community/kube-prometheus-stack \
  --namespace metrics \
  --values metrics/kube-prometheus-stack-values.yaml
 kubectl --namespace metrics get secret kube-prometheus-stack-grafana \
  -o jsonpath="{.data.admin-password}" | base64 -d
 echo
 # expose grafana via Traefik
 kubectl apply -f metrics/grafana-ingress.yaml
 kubectl -n metrics get ingress grafana
 # victoria metrics for long-term storage
 helm repo add victoria-metrics https://victoriametrics.github.io/helm-charts
 helm upgrade --install victoria-metrics-single victoria-metrics/victoria-metrics-single \
  --namespace metrics \
  --values metrics/victoria-metrics-single-values.yaml
 # expose victoria metrics via ClusterIP for Prometheus/Grafana
 kubectl apply -f metrics/victoria-metrics-service.yaml
 ```
 The manifests default to the Yandex Managed Kubernetes dynamic storage class `yc-network-hdd`; tweak the `storageClassName`/`storageClass` fields and capacities if you prefer something else.
 Before applying `metrics/grafana-ingress.yaml`, update the host (`grafana.playground.t01tt.tech`) and, if needed, change the `cert-manager.io/cluster-issuer` annotation to match your staging/production workflow. The ingress uses the `traefik` ingress class.
 ## Components
 - **Prometheus Operator** provisions Prometheus, Alertmanager and related CRDs. Remote write targets VictoriaMetrics for durable retention.
 - **Grafana** is pre-provisioned with persistence enabled and a secondary data source pointing at VictoriaMetrics.
 - **VictoriaMetrics** stores metrics for long-term retention while also serving query traffic for Grafana. A dedicated ClusterIP service (`metrics/victoria-metrics-service.yaml`) exposes port 8428 for Prometheus remote write and Grafana queries.
 ## Database choices
 Prometheus ships with an embedded TSDB. For longer retention, clustering or multi-tenant needs you can offload data to:
 - **VictoriaMetrics** (single, clustered, or managed) – cost-efficient, Prometheus-compatible, supports multi-year retention.
 - **Thanos / Cortex / Grafana Mimir** – horizontally scalable object-storage backed TSDBs with multi-cluster federation.
 - **ClickHouse / TimescaleDB / PostgreSQL** – SQL stores for advanced analytics (requires Promscale or similar adapter).
 - **Graphite / InfluxDB** – legacy or streaming-friendly stores; integrate via remote write adapters.
 Pick the backend that matches your retention and query latency requirements. Remote write configuration lives under `prometheus.prometheusSpec.remoteWrite` in `kube-prometheus-stack-values.yaml`.
 ## Post-install checks
 ```sh
 kubectl -n metrics get pods
 kubectl -n metrics get svc
 kubectl -n metrics get prometheus,prometheusrules,servicemonitors -A
 ```
--- a/metrics/grafana-ingress.yaml
+++ b/metrics/grafana-ingress.yaml
@@ -0,0 +1,26 @@
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: grafana
  namespace: metrics
  annotations:
    cert-manager.io/cluster-issuer: letsencrypt-production
    traefik.ingress.kubernetes.io/router.entrypoints: websecure
    traefik.ingress.kubernetes.io/router.tls: "true"
 spec:
  ingressClassName: traefik
  tls:
    - hosts:
        - grafana.mrt0rtikize.ru
      secretName: grafana-tls
  rules:
    - host: grafana.mrt0rtikize.ru
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: kube-prometheus-stack-grafana
                port:
                  number: 80
--- a/metrics/kube-prometheus-stack-values.yaml
+++ b/metrics/kube-prometheus-stack-values.yaml
@@ -0,0 +1,90 @@
 fullnameOverride: kube-prometheus
 namespaceOverride: metrics
 prometheusOperator:
  namespace: metrics
  admissionWebhooks:
    failurePolicy: Ignore
 alertmanager:
  enabled: true
  alertmanagerSpec:
    resources:
      requests:
        cpu: 50m
        memory: 128Mi
      limits:
        cpu: 200m
        memory: 512Mi
    storage:
      volumeClaimTemplate:
        spec:
          accessModes:
            - ReadWriteOnce
          resources:
            requests:
              storage: 10Gi
 prometheus:
  enabled: true
  prometheusSpec:
    replicas: 1
    retention: 15d
    walCompression: true
    serviceMonitorSelectorNilUsesHelmValues: false
    podMonitorSelectorNilUsesHelmValues: false
    resources:
      requests:
        cpu: 100m
        memory: 512Mi
      limits:
        cpu: 1000m
        memory: 1Gi
    storageSpec:
      volumeClaimTemplate:
        spec:
          accessModes:
            - ReadWriteOnce
          resources:
            requests:
              storage: 50Gi
    remoteWrite:
      - url: http://victoria-metrics.metrics.svc.cluster.local:8428/api/v1/write
        queueConfig:
          maxSamplesPerSend: 10000
          capacity: 5000
          maxShards: 30
 kubeEtcd:
  enabled: false
 kubeControllerManager:
  enabled: false
 kubeScheduler:
  enabled: false
 kubeProxy:
  enabled: false
 grafana:
  enabled: true
  adminUser: admin
  adminPassword: change-me
  defaultDashboardsEnabled: true
  resources:
    requests:
      cpu: 50m
      memory: 256Mi
    limits:
      cpu: 200m
      memory: 512Mi
  persistence:
    enabled: true
    size: 10Gi
  additionalDataSources:
    - name: victoria-metrics
      type: prometheus
      access: proxy
      url: http://victoria-metrics.metrics.svc.cluster.local:8428
      isDefault: false
--- a/metrics/namespace.yaml
+++ b/metrics/namespace.yaml
@@ -0,0 +1,4 @@
 apiVersion: v1
 kind: Namespace
 metadata:
  name: metrics
--- a/metrics/victoria-metrics-service.yaml
+++ b/metrics/victoria-metrics-service.yaml
@@ -0,0 +1,19 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: victoria-metrics
  namespace: metrics
  labels:
    app.kubernetes.io/name: victoria-metrics-single
    app.kubernetes.io/instance: victoria-metrics-single
    app: server
 spec:
  selector:
    app.kubernetes.io/name: victoria-metrics-single
    app.kubernetes.io/instance: victoria-metrics-single
    app: server
  ports:
    - name: http
      port: 8428
      targetPort: 8428
  type: ClusterIP
--- a/metrics/victoria-metrics-single-values.yaml
+++ b/metrics/victoria-metrics-single-values.yaml
@@ -0,0 +1,23 @@
 fullnameOverride: victoria-metrics-single
 namespaceOverride: metrics
 server:
  retentionPeriod: 30d
  scrapeInterval: 30s
  replicaCount: 1
  persistentVolume:
    enabled: true
    size: 200Gi
  resources:
    requests:
      cpu: 100m
      memory: 256Mi
    limits:
      cpu: 500m
      memory: 1Gi
  service:
    type: ClusterIP
    port: 8428
 serviceAccount:
  create: true
--- a/sillytavern/configmap.yaml
+++ b/sillytavern/configmap.yaml
@@ -0,0 +1,122 @@
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: sillytavern-config
  namespace: sillytavern
 data:
  config.yaml: |
    dataRoot: ./data
    listen: true
    listenAddress:
      ipv4: 0.0.0.0
      ipv6: '[::]'
    protocol:
        ipv4: true
        ipv6: false
    dnsPreferIPv6: false
    browserLaunch:
      enabled: false
      browser: 'default'
      hostname: 'auto'
      port: -1
      avoidLocalhost: false
    port: 8000
    ssl:
      enabled: false
      certPath: "./certs/cert.pem"
      keyPath: "./certs/privkey.pem"
      keyPassphrase: ""
    whitelistMode: false
    enableForwardedWhitelist: false
    whitelist:
      - ::1
      - 127.0.0.1
    whitelistDockerHosts: false
    basicAuthMode: false
    basicAuthUser:
      username: "user"
      password: "password"
    enableCorsProxy: false
    requestProxy:
      enabled: false
      url: "socks5://username:password@example.com:1080"
      bypass:
        - localhost
        - 127.0.0.1
    enableUserAccounts: false
    enableDiscreetLogin: false
    perUserBasicAuth: false
    sso:
      autheliaAuth: false
      authentikAuth: false
    hostWhitelist:
      enabled: false
      scan: true
      hosts: []
    sessionTimeout: -1
    disableCsrfProtection: false
    securityOverride: false
    logging:
      enableAccessLog: true
      minLogLevel: 0
    rateLimiting:
      preferRealIpHeader: false
    backups:
      common:
        numberOfBackups: 50
      chat:
        enabled: true
        checkIntegrity: true
        maxTotalBackups: -1
        throttleInterval: 10000
    thumbnails:
      enabled: true
      format: "jpg"
      quality: 95
      dimensions: { 'bg': [160, 90], 'avatar': [96, 144], 'persona': [96, 144] }
    performance:
      lazyLoadCharacters: false
      memoryCacheCapacity: '100mb'
      useDiskCache: true
    cacheBuster:
      enabled: false
      userAgentPattern: ''
    allowKeysExposure: false
    skipContentCheck: false
    whitelistImportDomains:
      - localhost
      - cdn.discordapp.com
      - files.catbox.moe
      - raw.githubusercontent.com
      - char-archive.evulid.cc
    requestOverrides: []
    extensions:
      enabled: true
      autoUpdate: true
      models:
        autoDownload: true
        classification: Cohee/distilbert-base-uncased-go-emotions-onnx
        captioning: Xenova/vit-gpt2-image-captioning
        embedding: Cohee/jina-embeddings-v2-base-en
        speechToText: Xenova/whisper-small
        textToSpeech: Xenova/speecht5_tts
    enableDownloadableTokenizers: true
    promptPlaceholder: "[Start a new chat]"
    openai:
      randomizeUserId: false
      captionSystemPrompt: ""
    deepl:
      formality: default
    mistral:
      enablePrefix: false
    ollama:
      keepAlive: -1
      batchSize: -1
    claude:
      enableSystemPromptCache: false
      cachingAtDepth: -1
      extendedTTL: false
    gemini:
      apiVersion: 'v1beta'
    enableServerPlugins: false
    enableServerPluginsAutoUpdate: true
--- a/sillytavern/deployment.yaml
+++ b/sillytavern/deployment.yaml
@@ -0,0 +1,61 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: sillytavern
  namespace: sillytavern
 spec:
  replicas: 1
  strategy:
    type: Recreate
  selector:
    matchLabels:
      app: sillytavern
  template:
    metadata:
      labels:
        app: sillytavern
    spec:
      containers:
        - name: sillytavern
          image: ghcr.io/sillytavern/sillytavern:latest
          ports:
            - containerPort: 8000
              protocol: TCP
          env:
            - name: NODE_ENV
              value: production
            - name: FORCE_COLOR
              value: "1"
          envFrom:
            - secretRef:
                name: sillytavern-auth
          volumeMounts:
            - name: config
              mountPath: /home/node/app/config/config.yaml
              subPath: config.yaml
            - name: data
              mountPath: /home/node/app/data
            - name: plugins
              mountPath: /home/node/app/plugins
            - name: extensions
              mountPath: /home/node/app/public/scripts/extensions/third-party
          resources:
            requests:
              cpu: "1"
              memory: 1Gi
            limits:
              cpu: "4"
              memory: 4Gi
      volumes:
        - name: config
          configMap:
            name: sillytavern-config
        - name: data
          persistentVolumeClaim:
            claimName: sillytavern-data
        - name: plugins
          persistentVolumeClaim:
            claimName: sillytavern-plugins
        - name: extensions
          persistentVolumeClaim:
            claimName: sillytavern-extensions
--- a/sillytavern/ingress.yaml
+++ b/sillytavern/ingress.yaml
@@ -0,0 +1,30 @@
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: sillytavern
  namespace: sillytavern
  labels:
    app.kubernetes.io/name: sillytavern
    app.kubernetes.io/component: frontend
    app.kubernetes.io/part-of: sillytavern
  annotations:
    cert-manager.io/cluster-issuer: letsencrypt-production
    traefik.ingress.kubernetes.io/router.entrypoints: websecure
    traefik.ingress.kubernetes.io/router.tls: "true"
 spec:
  ingressClassName: traefik
  tls:
    - hosts:
        - sillytavern.mrt0rtikize.ru
      secretName: sillytavern-tls
  rules:
    - host: sillytavern.mrt0rtikize.ru
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: sillytavern
                port:
                  number: 8000
--- a/sillytavern/namespace.yaml
+++ b/sillytavern/namespace.yaml
@@ -0,0 +1,4 @@
 apiVersion: v1
 kind: Namespace
 metadata:
  name: sillytavern
--- a/sillytavern/pvc.yaml
+++ b/sillytavern/pvc.yaml
@@ -0,0 +1,35 @@
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: sillytavern-data
  namespace: sillytavern
 spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 5Gi
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: sillytavern-plugins
  namespace: sillytavern
 spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 1Gi
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: sillytavern-extensions
  namespace: sillytavern
 spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 1Gi
--- a/sillytavern/secret.yaml
+++ b/sillytavern/secret.yaml
@@ -0,0 +1,10 @@
 apiVersion: v1
 kind: Secret
 metadata:
  name: sillytavern-auth
  namespace: sillytavern
 type: Opaque
 stringData:
  SILLYTAVERN_BASICAUTHMODE: "true"
  SILLYTAVERN_BASICAUTHUSER_USERNAME: admin
  SILLYTAVERN_BASICAUTHUSER_PASSWORD: 0cdaa30c396dae77
--- a/sillytavern/service.yaml
+++ b/sillytavern/service.yaml
@@ -0,0 +1,13 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: sillytavern
  namespace: sillytavern
 spec:
  selector:
    app: sillytavern
  ports:
    - port: 8000
      targetPort: 8000
      protocol: TCP
  type: ClusterIP