commit 5d9a80b97618699fde416d6157cd2c43cb587589 Author: Alexander Rogov Date: Tue May 5 13:18:51 2026 +0300 Initial commit: k3s GitOps manifests with ArgoCD App-of-Apps diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..04204c7 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +config diff --git a/argocd/app-of-apps.yaml b/argocd/app-of-apps.yaml new file mode 100644 index 0000000..39c9b1a --- /dev/null +++ b/argocd/app-of-apps.yaml @@ -0,0 +1,25 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: root-app + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + source: + repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git + targetRevision: main + path: argocd/apps + directory: + recurse: true + include: "*.yaml" + destination: + server: https://kubernetes.default.svc + namespace: argocd + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/argocd/apps/cert-manager.yaml b/argocd/apps/cert-manager.yaml new file mode 100644 index 0000000..01aa524 --- /dev/null +++ b/argocd/apps/cert-manager.yaml @@ -0,0 +1,26 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: cert-manager + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + source: + repoURL: https://charts.jetstack.io + chart: cert-manager + targetRevision: v1.20.1 + helm: + values: | + crds: + enabled: true + destination: + server: https://kubernetes.default.svc + namespace: cert-manager + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/argocd/apps/llama.yaml b/argocd/apps/llama.yaml new file mode 100644 index 0000000..cad2ef8 --- /dev/null +++ b/argocd/apps/llama.yaml @@ -0,0 +1,22 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: llama + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + source: + repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git + targetRevision: main + path: manifests/llama + destination: + server: https://kubernetes.default.svc + namespace: llama + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/argocd/apps/longhorn.yaml b/argocd/apps/longhorn.yaml new file mode 100644 index 0000000..281d830 --- /dev/null +++ b/argocd/apps/longhorn.yaml @@ -0,0 +1,29 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: longhorn + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + sources: + - repoURL: https://charts.longhorn.io + chart: longhorn + targetRevision: 1.11.2 + helm: + values: | + preUpgradeChecker: + jobEnabled: false + - repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git + targetRevision: main + path: manifests/longhorn + destination: + server: https://kubernetes.default.svc + namespace: longhorn-system + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/argocd/apps/metallb.yaml b/argocd/apps/metallb.yaml new file mode 100644 index 0000000..8299636 --- /dev/null +++ b/argocd/apps/metallb.yaml @@ -0,0 +1,29 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: metallb + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + sources: + - repoURL: https://metallb.github.io/metallb + chart: metallb + targetRevision: 0.14.5 + helm: + values: | + crds: + enabled: true + - repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git + targetRevision: main + path: manifests/metallb + destination: + server: https://kubernetes.default.svc + namespace: metallb-system + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/argocd/apps/metrics.yaml b/argocd/apps/metrics.yaml new file mode 100644 index 0000000..98b4e1d --- /dev/null +++ b/argocd/apps/metrics.yaml @@ -0,0 +1,37 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: metrics + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + sources: + - repoURL: https://prometheus-community.github.io/helm-charts + chart: kube-prometheus-stack + targetRevision: 82.16.2 + helm: + valueFiles: + - $values/manifests/metrics/kube-prometheus-stack-values.yaml + - repoURL: https://victoriametrics.github.io/helm-charts/ + chart: victoria-metrics-single + targetRevision: 0.34.0 + helm: + valueFiles: + - $values/manifests/metrics/victoria-metrics-single-values.yaml + - repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git + targetRevision: main + ref: values + - repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git + targetRevision: main + path: manifests/metrics + destination: + server: https://kubernetes.default.svc + namespace: metrics + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/argocd/apps/sillytavern.yaml b/argocd/apps/sillytavern.yaml new file mode 100644 index 0000000..18d79c1 --- /dev/null +++ b/argocd/apps/sillytavern.yaml @@ -0,0 +1,22 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: sillytavern + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + source: + repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git + targetRevision: main + path: manifests/sillytavern + destination: + server: https://kubernetes.default.svc + namespace: sillytavern + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/bootstrap.sh b/bootstrap.sh new file mode 100755 index 0000000..f71bd0c --- /dev/null +++ b/bootstrap.sh @@ -0,0 +1,229 @@ +#!/bin/bash +set -e + +# ============================================================================= +# k3s GitOps Bootstrap Script +# ============================================================================= +# This script sets up Gitea + ArgoCD on the k3s cluster and configures +# GitOps with the App-of-Apps pattern. +# +# Prerequisites: +# - kubectl + kubeconfig access to the cluster +# - helm installed +# - git installed +# - DNS for *.mrt0rtikize.ru pointing to cluster nodes +# ============================================================================= + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_DIR="$(dirname "$SCRIPT_DIR")" +KUBECONFIG="${REPO_DIR}/config" +KCTL="kubectl --kubeconfig ${KUBECONFIG}" + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +echo -e "${GREEN}==============================================${NC}" +echo -e "${GREEN} k3s GitOps Bootstrap${NC}" +echo -e "${GREEN}==============================================${NC}" +echo "" + +# ----------------------------------------------------------------------------- +# Step 1: Deploy Gitea +# ----------------------------------------------------------------------------- +echo -e "${YELLOW}[1/6] Deploying Gitea...${NC}" + +${KCTL} apply -f "${REPO_DIR}/bootstrap/gitea/" + +echo " Waiting for Gitea pod to be ready..." +${KCTL} wait --for=condition=ready pod -l app=gitea -n gitea --timeout=120s 2>/dev/null || { + echo -e "${RED} Gitea pod not ready after 120s. Checking status...${NC}" + ${KCTL} get pod -n gitea + exit 1 +} + +echo -e "${GREEN} Gitea deployed!${NC}" +echo "" + +# ----------------------------------------------------------------------------- +# Step 2: Gitea initial setup (manual) +# ----------------------------------------------------------------------------- +echo -e "${YELLOW}[2/6] Gitea setup${NC}" +echo "" +echo " Gitea is running. Please open the install page in your browser:" +echo "" +echo -e " ${GREEN}https://git.mrt0rtikize.ru/${NC}" +echo "" +echo " Complete the install wizard with these settings:" +echo " - Database: SQLite3" +echo " - Admin Username: gitea" +echo " - Admin Password: " +echo " - Confirm Password: " +echo " - Admin Email: admin@mrt0rtikize.ru" +echo "" +echo " After install, create a repository named:" +echo "" +echo -e " ${GREEN}k3s-manifests${NC}" +echo "" +echo " Make it PUBLIC (so ArgoCD can read it without auth)." +echo "" + +GITEA_PASSWORD="" +read -p " Gitea admin password (from install wizard): " GITEA_PASSWORD + +if [ -z "$GITEA_PASSWORD" ]; then + echo -e "${RED} Password is required. Exiting.${NC}" + exit 1 +fi + +# Save password for later use +GITEA_EXTERNAL="https://git.mrt0rtikize.ru" +GITEA_INTERNAL="http://gitea.gitea.svc.cluster.local:3000" +GITEA_USER="gitea" +GITEA_REPO="k3s-manifests" +GITEA_REPO_URL="${GITEA_EXTERNAL}/${GITEA_USER}/${GITEA_REPO}.git" +GITEA_INTERNAL_REPO="${GITEA_INTERNAL}/${GITEA_USER}/${GITEA_REPO}.git" + +echo "" + +# ----------------------------------------------------------------------------- +# Step 3: Initialize git repo and push manifests +# ----------------------------------------------------------------------------- +echo -e "${YELLOW}[3/6] Initializing git repo...${NC}" + +# Create .gitignore +cat > "${REPO_DIR}/.gitignore" << 'GITIGNORE' +# Sensitive files +config +GITIGNORE + +cd "${REPO_DIR}" + +if [ ! -d ".git" ]; then + git init + git checkout -b main +fi + +git add . +git commit -m "Initial commit: k3s GitOps manifests" 2>/dev/null || { + echo " Nothing to commit (already up to date)" +} + +echo " Pushing to Gitea..." +GIT_TERMINAL_PROMPT=0 git push -u "${GITEA_REPO_URL}" main 2>/dev/null || { + echo "" + echo -e " ${RED}Push failed.${NC} Did you create the '${GITEA_REPO}' repo in Gitea?" + echo " You can retry manually:" + echo " cd ${REPO_DIR}" + echo " git push -u ${GITEA_REPO_URL} main" + echo "" + read -p " Press Enter after pushing... " -r +} + +echo -e "${GREEN} Manifests pushed to Gitea!${NC}" +echo "" + +# ----------------------------------------------------------------------------- +# Step 4: Install ArgoCD +# ----------------------------------------------------------------------------- +echo -e "${YELLOW}[4/6] Installing ArgoCD...${NC}" + +helm repo add argo https://argoproj.github.io/argo-helm 2>/dev/null || true +helm repo update + +helm upgrade --install argocd argo/argo-cd \ + --namespace argocd \ + --create-namespace \ + --set server.extraArgs[0]="--insecure" \ + --set configs.params."server\.insecure"=true \ + --set configs.cm.timeout.reconciliation=180s \ + --wait \ + --timeout 300s + +ARGOCD_PASSWORD=$(${KCTL} -n argocd get secret argocd-initial-admin-secret -o jsonpath="{.data.password}" 2>/dev/null | base64 -d) + +echo -e "${GREEN} ArgoCD installed!${NC}" +echo "" +echo " ArgoCD UI (port-forward):" +echo " kubectl --kubeconfig ${KUBECONFIG} port-forward svc/argocd-server -n argocd 8080:80" +echo " Username: admin" +echo "" +if [ -n "$ARGOCD_PASSWORD" ]; then + echo " Password: ${ARGOCD_PASSWORD}" +fi +echo "" + +# ----------------------------------------------------------------------------- +# Step 5: Configure ArgoCD → Gitea connection +# ----------------------------------------------------------------------------- +echo -e "${YELLOW}[5/6] Configuring ArgoCD → Gitea connection...${NC}" + +# Add Gitea as a repository in ArgoCD +# Using argocd CLI if available, otherwise using creds + secret +if command -v argocd &> /dev/null; then + echo " Using argocd CLI..." + ARGOCD_SERVER="localhost:8080" + echo " Please port-forward ArgoCD in another terminal:" + echo " kubectl --kubeconfig ${KUBECONFIG} port-forward svc/argocd-server -n argocd 8080:80" + echo "" + read -p " Press Enter when ready..." -r + + argocd login "${ARGOCD_SERVER}" --username admin --password "${ARGOCD_PASSWORD}" --insecure + argocd repo add "${GITEA_INTERNAL_REPO}" --name gitea-k3s --type git +else + # Fallback: create repository secret manually + echo " Creating repository secret manually..." + ${KCTL} -n argocd create secret generic gitea-k3s-repo \ + --from-literal=url="${GITEA_INTERNAL_REPO}" \ + --from-literal=type=git \ + --from-literal=name=gitea-k3s \ + --dry-run=client -o yaml | \ + sed 's/name: gitea-k3s-repo/name: gitea-k3s-repo\n labels:\n argocd.argoproj.io\/secret-type: repository/' | \ + ${KCTL} apply -f - 2>/dev/null + + # For a public repo, ArgoCD can access it without credentials + # If the repo is private, uncomment and configure: + # ${KCTL} -n argocd create secret generic gitea-k3s-repo \ + # --from-literal=url="${GITEA_INTERNAL_REPO}" \ + # --from-literal=type=git \ + # --from-literal=name=gitea-k3s \ + # --from-literal=username="${GITEA_USER}" \ + # --from-literal=password="${GITEA_PASSWORD}" \ + # --dry-run=client -o yaml | \ + # sed 's/name: gitea-k3s-repo/name: gitea-k3s-repo\n labels:\n argocd.argoproj.io\/secret-type: repository/' | \ + # ${KCTL} apply -f - +fi + +echo -e "${GREEN} Repository configured!${NC}" +echo "" + +# ----------------------------------------------------------------------------- +# Step 6: Apply the root app +# ----------------------------------------------------------------------------- +echo -e "${YELLOW}[6/6] Applying root App-of-Apps...${NC}" + +${KCTL} apply -f "${REPO_DIR}/argocd/app-of-apps.yaml" + +echo "" +echo -e "${GREEN}==============================================${NC}" +echo -e "${GREEN} Bootstrap Complete!${NC}" +echo -e "${GREEN}==============================================${NC}" +echo "" +echo " Root app created. ArgoCD will now sync all child apps:" +echo "" +echo " - cert-manager" +echo " - metallb" +echo " - longhorn" +echo " - metrics (prometheus + victoria-metrics)" +echo " - llama" +echo " - sillytavern" +echo "" +echo " Monitor progress:" +echo " kubectl --kubeconfig ${KUBECONFIG} port-forward svc/argocd-server -n argocd 8080:80" +echo " Open http://localhost:8080" +echo " Login: admin / ${ARGOCD_PASSWORD}" +echo "" +echo " Check sync status:" +echo " kubectl --kubeconfig ${KUBECONFIG} get applications -n argocd" +echo "" diff --git a/bootstrap/argocd/install.sh b/bootstrap/argocd/install.sh new file mode 100755 index 0000000..daf15f2 --- /dev/null +++ b/bootstrap/argocd/install.sh @@ -0,0 +1,36 @@ +#!/bin/bash +set -e + +# Bootstrap ArgoCD on the k3s cluster +# This is a one-time manual step before GitOps takes over + +KUBECONFIG="/home/mrt0rtikize/infra/k3s/config" +KCTL="kubectl --kubeconfig ${KUBECONFIG}" + +echo "=== Installing ArgoCD ===" + +# Add ArgoCD Helm repo +helm repo add argo https://argoproj.github.io/argo-helm 2>/dev/null || true +helm repo update + +# Install ArgoCD +helm upgrade --install argocd argo/argo-cd \ + --namespace argocd \ + --create-namespace \ + --set server.extraArgs[0]="--insecure" \ + --set configs.params."server\.insecure"=true \ + --set configs.cm.timeout.reconciliation=180s \ + --wait \ + --timeout 300s + +echo "" +echo "=== ArgoCD installed ===" +echo "" +echo "To access ArgoCD UI:" +echo " kubectl --kubeconfig ${KUBECONFIG} port-forward svc/argocd-server -n argocd 8080:80" +echo "" +echo "Admin password:" +kubectl --kubeconfig ${KUBECONFIG} -n argocd get secret argocd-initial-admin-secret -o jsonpath="{.data.password}" | base64 -d +echo "" +echo "" +echo "Login with username: admin" diff --git a/bootstrap/gitea/deployment.yaml b/bootstrap/gitea/deployment.yaml new file mode 100644 index 0000000..e6645f2 --- /dev/null +++ b/bootstrap/gitea/deployment.yaml @@ -0,0 +1,62 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: gitea + namespace: gitea +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app: gitea + template: + metadata: + labels: + app: gitea + spec: + containers: + - name: gitea + image: gitea/gitea:1.24 + ports: + - containerPort: 3000 + name: http + - containerPort: 22 + name: ssh + env: + - name: GITEA__database__DB_TYPE + value: sqlite3 + - name: GITEA__server__DOMAIN + value: git.mrt0rtikize.ru + - name: GITEA__server__ROOT_URL + value: https://git.mrt0rtikize.ru + - name: GITEA__server__HTTP_PORT + value: "3000" + - name: GITEA__server__SSH_PORT + value: "22" + volumeMounts: + - name: data + mountPath: /data + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 500m + memory: 512Mi + livenessProbe: + httpGet: + path: / + port: 3000 + initialDelaySeconds: 10 + periodSeconds: 10 + readinessProbe: + httpGet: + path: / + port: 3000 + initialDelaySeconds: 5 + periodSeconds: 5 + volumes: + - name: data + persistentVolumeClaim: + claimName: gitea-data diff --git a/bootstrap/gitea/ingress.yaml b/bootstrap/gitea/ingress.yaml new file mode 100644 index 0000000..64cbd27 --- /dev/null +++ b/bootstrap/gitea/ingress.yaml @@ -0,0 +1,24 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: gitea + namespace: gitea + annotations: + cert-manager.io/cluster-issuer: letsencrypt-production +spec: + ingressClassName: traefik + tls: + - hosts: + - git.mrt0rtikize.ru + secretName: gitea-tls + rules: + - host: git.mrt0rtikize.ru + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: gitea + port: + number: 3000 diff --git a/bootstrap/gitea/namespace.yaml b/bootstrap/gitea/namespace.yaml new file mode 100644 index 0000000..09a988f --- /dev/null +++ b/bootstrap/gitea/namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: gitea diff --git a/bootstrap/gitea/pvc.yaml b/bootstrap/gitea/pvc.yaml new file mode 100644 index 0000000..efd856d --- /dev/null +++ b/bootstrap/gitea/pvc.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: gitea-data + namespace: gitea +spec: + storageClassName: longhorn + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 20Gi diff --git a/bootstrap/gitea/service.yaml b/bootstrap/gitea/service.yaml new file mode 100644 index 0000000..378a351 --- /dev/null +++ b/bootstrap/gitea/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: gitea + namespace: gitea +spec: + selector: + app: gitea + ports: + - name: http + port: 3000 + targetPort: 3000 + - name: ssh + port: 22 + targetPort: 22 diff --git a/infra/argocd/README.md b/infra/argocd/README.md new file mode 100644 index 0000000..e69de29 diff --git a/infra/longhorn/README.md b/infra/longhorn/README.md new file mode 100644 index 0000000..e69de29 diff --git a/infra/longhorn/longhorn-ingress.yaml b/infra/longhorn/longhorn-ingress.yaml new file mode 100644 index 0000000..de61447 --- /dev/null +++ b/infra/longhorn/longhorn-ingress.yaml @@ -0,0 +1,26 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: longhorn-ingress + namespace: longhorn-system + annotations: + cert-manager.io/cluster-issuer: letsencrypt-production + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" +spec: + ingressClassName: traefik # We use Traefik as the ingress controller + tls: + - hosts: + - longhorn.mrt0rtikize.ru + secretName: longhorn-tls + rules: + - host: longhorn.mrt0rtikize.ru + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: longhorn-frontend # Service managing Longhorn dashboard + port: + number: 80 # Service port where Longhorn UI runs diff --git a/infra/longhorn/test-pvc.yaml b/infra/longhorn/test-pvc.yaml new file mode 100644 index 0000000..ba5f8a3 --- /dev/null +++ b/infra/longhorn/test-pvc.yaml @@ -0,0 +1,23 @@ +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: longhorn +provisioner: driver.longhorn.io +parameters: + numberOfReplicas: '2' + staleReplicaTimeout: '30' +allowVolumeExpansion: true +reclaimPolicy: Retain +volumeBindingMode: Immediate +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: longhorn-pvc +spec: + accessModes: + - ReadWriteOnce + storageClassName: longhorn + resources: + requests: + storage: 2Gi diff --git a/infra/metallb/README.md b/infra/metallb/README.md new file mode 100644 index 0000000..e69de29 diff --git a/infra/metallb/ip-address-pool.yaml b/infra/metallb/ip-address-pool.yaml new file mode 100644 index 0000000..eb24987 --- /dev/null +++ b/infra/metallb/ip-address-pool.yaml @@ -0,0 +1,10 @@ +apiVersion: metallb.io/v1beta1 +kind: IPAddressPool +metadata: + name: default-address-pool + namespace: metallb-system +spec: + addresses: + - 10.0.0.120-10.0.0.200 + autoAssign: true + avoidBuggyIPs: true diff --git a/infra/metallb/l2advert.yaml b/infra/metallb/l2advert.yaml new file mode 100644 index 0000000..6d65e99 --- /dev/null +++ b/infra/metallb/l2advert.yaml @@ -0,0 +1,8 @@ +apiVersion: metallb.io/v1beta1 +kind: L2Advertisement +metadata: + name: default-advertisement + namespace: metallb-system +spec: + ipAddressPools: + - default-address-pool diff --git a/llama/cpu.yaml b/llama/cpu.yaml new file mode 100644 index 0000000..aa04ff6 --- /dev/null +++ b/llama/cpu.yaml @@ -0,0 +1,147 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llama-server-cpu + namespace: llama +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app: llama-server-cpu + template: + metadata: + labels: + app: llama-server-cpu + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8080" + prometheus.io/path: "/metrics" + spec: + nodeSelector: + gpu: amd + + initContainers: + - name: download-model + image: python:3.11-slim + env: + - name: HF_HOME + value: /models/.hf + - name: MODEL_REPO + value: "byteshape/Qwen3-Coder-30B-A3B-Instruct-GGUF" + - name: MODEL_FILE + value: "Qwen3-Coder-30B-A3B-Instruct-IQ4_XS-4.20bpw.gguf" + command: + - /bin/sh + - -c + - | + set -eux + + MODEL_PATH="/models/${MODEL_FILE}" + + if [ -f "${MODEL_PATH}" ]; then + echo "Model already exists at ${MODEL_PATH}, skipping download" + exit 0 + fi + + echo "Installing Hugging Face Hub downloader" + pip install --no-cache-dir huggingface_hub + + echo "Downloading ${MODEL_REPO}/${MODEL_FILE}" + python - <<'PY' + import os + from huggingface_hub import hf_hub_download + + repo_id = os.environ["MODEL_REPO"] + filename = os.environ["MODEL_FILE"] + + token = os.environ.get("HUGGING_FACE_HUB_TOKEN") + + path = hf_hub_download( + repo_id=repo_id, + filename=filename, + local_dir="/models", + local_dir_use_symlinks=False, + token=token, + ) + print(f"Downloaded to: {path}") + PY + + ls -lah /models + volumeMounts: + - name: models + mountPath: /models + + containers: + - name: llama + image: ghcr.io/ggml-org/llama.cpp:server + args: + - "--model" + - "/models/Qwen3-Coder-30B-A3B-Instruct-IQ4_XS-4.20bpw.gguf" + - "--host" + - "0.0.0.0" + - "--port" + - "8080" + - "--metrics" + - "--ctx-size" + - "32768" + - "--parallel" + - "1" + - "--cache-type-k" + - "q8_0" + - "--cache-type-v" + - "q8_0" + ports: + - name: http + containerPort: 8080 + + volumeMounts: + - name: models + mountPath: /models + + resources: + requests: + cpu: "8" + memory: "24Gi" + limits: + cpu: "12" + memory: "24Gi" + + volumes: + - name: models + persistentVolumeClaim: + claimName: llama-cpu-models-pvc +--- +apiVersion: v1 +kind: Service +metadata: + name: llama-server-cpu + namespace: llama +spec: + selector: + app: llama-server-cpu + ports: + - name: http + port: 8080 + targetPort: http + type: ClusterIP +--- +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: llama-server-cpu + namespace: llama + labels: + app: llama-server-cpu +spec: + namespaceSelector: + matchNames: + - llama + selector: + matchLabels: + app: llama-server-cpu + podMetricsEndpoints: + - port: http + path: /metrics + interval: 15s diff --git a/llama/gpu-exporter.yaml b/llama/gpu-exporter.yaml new file mode 100644 index 0000000..705cbcf --- /dev/null +++ b/llama/gpu-exporter.yaml @@ -0,0 +1,62 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: radeon-exporter + namespace: llama + labels: + app: radeon-exporter +spec: + selector: + matchLabels: + app: radeon-exporter + template: + metadata: + labels: + app: radeon-exporter + spec: + nodeSelector: + gpu: amd + containers: + - name: radeon-exporter + image: kmulvey/radeon_exporter:latest + imagePullPolicy: IfNotPresent + ports: + - name: metrics + containerPort: 9200 + securityContext: + privileged: true + volumeMounts: + - name: sys + mountPath: /sys + readOnly: true + - name: dri + mountPath: /dev/dri + readOnly: true + volumes: + - name: sys + hostPath: + path: /sys + type: Directory + - name: dri + hostPath: + path: /dev/dri + type: Directory +--- +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: radeon-exporter + namespace: llama + labels: + monitoring: primary +spec: + namespaceSelector: + matchNames: + - llama + selector: + matchLabels: + app: radeon-exporter + podMetricsEndpoints: + - port: metrics + path: /metrics + interval: 15s diff --git a/llama/litellm-db.yaml b/llama/litellm-db.yaml new file mode 100644 index 0000000..75fa931 --- /dev/null +++ b/llama/litellm-db.yaml @@ -0,0 +1,116 @@ +apiVersion: v1 +kind: Secret +metadata: + name: litellm-postgres + namespace: llama +type: Opaque +stringData: + POSTGRES_DB: litellm + POSTGRES_USER: litellm + POSTGRES_PASSWORD: 7792e47efbc7348155f54a15ed34dc1d06716b2b1848711d0ee90e3461883c0d +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: litellm-postgres + namespace: llama + labels: + app.kubernetes.io/name: litellm-postgres +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: litellm-postgres + namespace: llama + labels: + app.kubernetes.io/name: litellm-postgres + app.kubernetes.io/component: database +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: litellm-postgres + app.kubernetes.io/component: database + template: + metadata: + labels: + app.kubernetes.io/name: litellm-postgres + app.kubernetes.io/component: database + spec: + containers: + - name: postgres + image: postgres:16 + imagePullPolicy: IfNotPresent + ports: + - name: postgres + containerPort: 5432 + env: + - name: POSTGRES_DB + valueFrom: + secretKeyRef: + name: litellm-postgres + key: POSTGRES_DB + - name: POSTGRES_USER + valueFrom: + secretKeyRef: + name: litellm-postgres + key: POSTGRES_USER + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: litellm-postgres + key: POSTGRES_PASSWORD + volumeMounts: + - name: data + mountPath: /var/lib/postgresql + readinessProbe: + exec: + command: + - sh + - -c + - pg_isready -U "$POSTGRES_USER" -d "$POSTGRES_DB" + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + exec: + command: + - sh + - -c + - pg_isready -U "$POSTGRES_USER" -d "$POSTGRES_DB" + initialDelaySeconds: 20 + periodSeconds: 20 + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 256Mi + volumes: + - name: data + persistentVolumeClaim: + claimName: litellm-postgres +--- +apiVersion: v1 +kind: Service +metadata: + name: litellm-postgres + namespace: llama + labels: + app.kubernetes.io/name: litellm-postgres + app.kubernetes.io/component: database +spec: + selector: + app.kubernetes.io/name: litellm-postgres + app.kubernetes.io/component: database + ports: + - name: postgres + port: 5432 + targetPort: postgres + type: ClusterIP diff --git a/llama/litellm.yaml b/llama/litellm.yaml new file mode 100644 index 0000000..2b8c3c2 --- /dev/null +++ b/llama/litellm.yaml @@ -0,0 +1,202 @@ +apiVersion: v1 +kind: Secret +metadata: + name: litellm-secret + namespace: llama + labels: + app.kubernetes.io/name: litellm + app.kubernetes.io/component: gateway +type: Opaque +stringData: + LITELLM_MASTER_KEY: "6991c7c0f02b4bcf" +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: litellm-config + namespace: llama + labels: + app.kubernetes.io/name: litellm + app.kubernetes.io/component: gateway +data: + config.yaml: | + model_list: + - model_name: fast + litellm_params: + model: openai/fast + api_base: "http://llama-server-gpu.llama.svc.cluster.local:8080/v1" + api_key: none + + - model_name: smart + litellm_params: + model: openai/smart + api_base: "http://llama-server-cpu.llama.svc.cluster.local:8080/v1" + api_key: none + + - model_name: rp + litellm_params: + model: openai/rp + api_base: "http://llama-server-gpu-rp.llama.svc.cluster.local:8080/v1" + api_key: none + litellm_settings: + callbacks: + - prometheus + general_settings: + store_model_in_db: true + store_prompts_in_spend_logs: true +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: litellm + namespace: llama + labels: + app.kubernetes.io/name: litellm + app.kubernetes.io/component: gateway + app.kubernetes.io/part-of: llama-stack + monitoring: prometheus +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: litellm + app.kubernetes.io/component: gateway + template: + metadata: + labels: + app.kubernetes.io/name: litellm + app.kubernetes.io/component: gateway + app.kubernetes.io/part-of: llama-stack + monitoring: prometheus + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "4000" + prometheus.io/path: "/metrics" + spec: + containers: + - name: litellm + image: ghcr.io/berriai/litellm:v1.82.6.rc.3 + imagePullPolicy: IfNotPresent + args: + - "--config" + - "/app/config.yaml" + env: + - name: LITELLM_MASTER_KEY + valueFrom: + secretKeyRef: + name: litellm-secret + key: LITELLM_MASTER_KEY + - name: POSTGRES_USER + valueFrom: + secretKeyRef: + name: litellm-postgres + key: POSTGRES_USER + + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: litellm-postgres + key: POSTGRES_PASSWORD + + - name: POSTGRES_DB + valueFrom: + secretKeyRef: + name: litellm-postgres + key: POSTGRES_DB + + - name: DATABASE_URL + value: "postgresql://$(POSTGRES_USER):$(POSTGRES_PASSWORD)@litellm-postgres.llama.svc.cluster.local:5432/$(POSTGRES_DB)" + ports: + - name: http + containerPort: 4000 + protocol: TCP + volumeMounts: + - name: litellm-config + mountPath: /app/config.yaml + subPath: config.yaml + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + volumes: + - name: litellm-config + configMap: + name: litellm-config +--- +apiVersion: v1 +kind: Service +metadata: + name: litellm + namespace: llama + labels: + app.kubernetes.io/name: litellm + app.kubernetes.io/component: gateway + app.kubernetes.io/part-of: llama-stack + monitoring: prometheus +spec: + selector: + app.kubernetes.io/name: litellm + app.kubernetes.io/component: gateway + ports: + - name: http + port: 4000 + targetPort: http + protocol: TCP + type: ClusterIP +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: litellm + namespace: llama + labels: + app.kubernetes.io/name: litellm + app.kubernetes.io/component: gateway + app.kubernetes.io/part-of: llama-stack + annotations: + cert-manager.io/cluster-issuer: letsencrypt-production + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" +spec: + ingressClassName: traefik + tls: + - hosts: + - litellm.mrt0rtikize.ru + secretName: web-echo-tls + rules: + - host: litellm.mrt0rtikize.ru + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: litellm + port: + number: 4000 +--- +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: litellm + namespace: llama + labels: + app.kubernetes.io/name: litellm + app.kubernetes.io/component: gateway + app.kubernetes.io/part-of: llama-stack + release: kube-prometheus-stack +spec: + namespaceSelector: + matchNames: + - llama + selector: + matchLabels: + app.kubernetes.io/name: litellm + app.kubernetes.io/component: gateway + podMetricsEndpoints: + - port: http + path: /metrics + interval: 30s diff --git a/llama/main.yaml b/llama/main.yaml new file mode 100644 index 0000000..9a62ef3 --- /dev/null +++ b/llama/main.yaml @@ -0,0 +1,166 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llama-server-gpu + namespace: llama +spec: + replicas: 1 + selector: + matchLabels: + app: llama-server-gpu + template: + metadata: + labels: + app: llama-server-gpu + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8080" + prometheus.io/path: "/metrics" + spec: + nodeSelector: + gpu: amd + + initContainers: + - name: download-model + image: python:3.11-slim + env: + - name: HF_HOME + value: /models/.hf + - name: MODEL_REPO + value: "byteshape/Devstral-Small-2-24B-Instruct-2512-GGUF" + - name: MODEL_FILE + value: "Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf" + # optional, only if you need gated/private models + # - name: HUGGING_FACE_HUB_TOKEN + # valueFrom: + # secretKeyRef: + # name: hf-token + # key: token + command: + - /bin/sh + - -c + - | + set -eux + + MODEL_PATH="/models/${MODEL_FILE}" + + if [ -f "${MODEL_PATH}" ]; then + echo "Model already exists at ${MODEL_PATH}, skipping download" + exit 0 + fi + + echo "Installing Hugging Face Hub downloader" + pip install --no-cache-dir huggingface_hub + + echo "Downloading ${MODEL_REPO}/${MODEL_FILE}" + python - <<'PY' + import os + from huggingface_hub import hf_hub_download + + repo_id = os.environ["MODEL_REPO"] + filename = os.environ["MODEL_FILE"] + + token = os.environ.get("HUGGING_FACE_HUB_TOKEN") + + path = hf_hub_download( + repo_id=repo_id, + filename=filename, + local_dir="/models", + local_dir_use_symlinks=False, + token=token, + ) + print(f"Downloaded to: {path}") + PY + + ls -lah /models + volumeMounts: + - name: models + mountPath: /models + + containers: + - name: llama + image: ghcr.io/ggml-org/llama.cpp:server-vulkan + args: + - "--model" + - "/models/Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf" + - "--host" + - "0.0.0.0" + - "--port" + - "8080" + - "--n-gpu-layers" + - "999" + - "--metrics" + + # performance tuning + - "--ctx-size" + - "32768" + - "--parallel" + - "4" + + # KV cache quantization + - "--cache-type-k" + - "q8_0" + - "--cache-type-v" + - "q8_0" + ports: + - name: http + containerPort: 8080 + + securityContext: + privileged: true + + volumeMounts: + - name: models + mountPath: /models + - name: dri + mountPath: /dev/dri + + resources: + requests: + cpu: "2" + memory: "4Gi" + limits: + cpu: "2" + memory: "4Gi" + + volumes: + - name: models + persistentVolumeClaim: + claimName: llama-gpu-models-pvc + - name: dri + hostPath: + path: /dev/dri + type: Directory +--- +apiVersion: v1 +kind: Service +metadata: + name: llama-server-gpu + namespace: llama +spec: + selector: + app: llama-server-gpu + ports: + - name: http + port: 8080 + targetPort: http + type: ClusterIP +--- +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: llama-server-gpu + namespace: llama + labels: + app: llama-server-gpu +spec: + namespaceSelector: + matchNames: + - llama + selector: + matchLabels: + app: llama-server-gpu + podMetricsEndpoints: + - port: http + path: /metrics + interval: 15s diff --git a/llama/namespace.yaml b/llama/namespace.yaml new file mode 100644 index 0000000..d68229b --- /dev/null +++ b/llama/namespace.yaml @@ -0,0 +1,42 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: llama +--- +# apiVersion: storage.k8s.io/v1 +# kind: StorageClass +# metadata: +# name: longhorn-llama +# provisioner: driver.longhorn.io +# parameters: +# numberOfReplicas: "2" +# staleReplicaTimeout: "30" +# allowVolumeExpansion: true +# reclaimPolicy: Retain +# volumeBindingMode: Immediate +# --- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: llama-gpu-models-pvc + namespace: llama +spec: + accessModes: + - ReadWriteOnce + # storageClassName: longhorn-llama + resources: + requests: + storage: 50Gi +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: llama-cpu-models-pvc + namespace: llama +spec: + accessModes: + - ReadWriteOnce + # storageClassName: longhorn-llama + resources: + requests: + storage: 100Gi diff --git a/llama/rp.yaml b/llama/rp.yaml new file mode 100644 index 0000000..1273e34 --- /dev/null +++ b/llama/rp.yaml @@ -0,0 +1,166 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llama-server-gpu-rp + namespace: llama +spec: + replicas: 1 + selector: + matchLabels: + app: llama-server-gpu-rp + template: + metadata: + labels: + app: llama-server-gpu-rp + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8080" + prometheus.io/path: "/metrics" + spec: + nodeSelector: + gpu: amd + + initContainers: + - name: download-model + image: python:3.11-slim + env: + - name: HF_HOME + value: /models/.hf + - name: MODEL_REPO + value: "mradermacher/Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B-GGUF" + - name: MODEL_FILE + value: "Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B.Q4_K_S.gguf" + # optional, only if you need gated/private models + # - name: HUGGING_FACE_HUB_TOKEN + # valueFrom: + # secretKeyRef: + # name: hf-token + # key: token + command: + - /bin/sh + - -c + - | + set -eux + + MODEL_PATH="/models/${MODEL_FILE}" + + if [ -f "${MODEL_PATH}" ]; then + echo "Model already exists at ${MODEL_PATH}, skipping download" + exit 0 + fi + + echo "Installing Hugging Face Hub downloader" + pip install --no-cache-dir huggingface_hub + + echo "Downloading ${MODEL_REPO}/${MODEL_FILE}" + python - <<'PY' + import os + from huggingface_hub import hf_hub_download + + repo_id = os.environ["MODEL_REPO"] + filename = os.environ["MODEL_FILE"] + + token = os.environ.get("HUGGING_FACE_HUB_TOKEN") + + path = hf_hub_download( + repo_id=repo_id, + filename=filename, + local_dir="/models", + local_dir_use_symlinks=False, + token=token, + ) + print(f"Downloaded to: {path}") + PY + + ls -lah /models + volumeMounts: + - name: models + mountPath: /models + + containers: + - name: llama + image: ghcr.io/ggml-org/llama.cpp:server-vulkan + args: + - "--model" + - "/models/Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B.Q4_K_S.gguf" + - "--host" + - "0.0.0.0" + - "--port" + - "8080" + - "--n-gpu-layers" + - "999" + - "--metrics" + + # performance tuning + - "--ctx-size" + - "32768" + - "--parallel" + - "1" + + # KV cache quantization + - "--cache-type-k" + - "q8_0" + - "--cache-type-v" + - "q8_0" + ports: + - name: http + containerPort: 8080 + + securityContext: + privileged: true + + volumeMounts: + - name: models + mountPath: /models + - name: dri + mountPath: /dev/dri + + resources: + requests: + cpu: "2" + memory: "4Gi" + limits: + cpu: "2" + memory: "4Gi" + + volumes: + - name: models + persistentVolumeClaim: + claimName: llama-gpu-models-pvc + - name: dri + hostPath: + path: /dev/dri + type: Directory +--- +apiVersion: v1 +kind: Service +metadata: + name: llama-server-gpu-rp + namespace: llama +spec: + selector: + app: llama-server-gpu-rp + ports: + - name: http + port: 8080 + targetPort: http + type: ClusterIP +--- +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: llama-server-gpu-rp + namespace: llama + labels: + app: llama-server-gpu-rp +spec: + namespaceSelector: + matchNames: + - llama + selector: + matchLabels: + app: llama-server-gpu-rp + podMetricsEndpoints: + - port: http + path: /metrics + interval: 15s diff --git a/manifests/llama/cpu.yaml b/manifests/llama/cpu.yaml new file mode 100644 index 0000000..aa04ff6 --- /dev/null +++ b/manifests/llama/cpu.yaml @@ -0,0 +1,147 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llama-server-cpu + namespace: llama +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app: llama-server-cpu + template: + metadata: + labels: + app: llama-server-cpu + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8080" + prometheus.io/path: "/metrics" + spec: + nodeSelector: + gpu: amd + + initContainers: + - name: download-model + image: python:3.11-slim + env: + - name: HF_HOME + value: /models/.hf + - name: MODEL_REPO + value: "byteshape/Qwen3-Coder-30B-A3B-Instruct-GGUF" + - name: MODEL_FILE + value: "Qwen3-Coder-30B-A3B-Instruct-IQ4_XS-4.20bpw.gguf" + command: + - /bin/sh + - -c + - | + set -eux + + MODEL_PATH="/models/${MODEL_FILE}" + + if [ -f "${MODEL_PATH}" ]; then + echo "Model already exists at ${MODEL_PATH}, skipping download" + exit 0 + fi + + echo "Installing Hugging Face Hub downloader" + pip install --no-cache-dir huggingface_hub + + echo "Downloading ${MODEL_REPO}/${MODEL_FILE}" + python - <<'PY' + import os + from huggingface_hub import hf_hub_download + + repo_id = os.environ["MODEL_REPO"] + filename = os.environ["MODEL_FILE"] + + token = os.environ.get("HUGGING_FACE_HUB_TOKEN") + + path = hf_hub_download( + repo_id=repo_id, + filename=filename, + local_dir="/models", + local_dir_use_symlinks=False, + token=token, + ) + print(f"Downloaded to: {path}") + PY + + ls -lah /models + volumeMounts: + - name: models + mountPath: /models + + containers: + - name: llama + image: ghcr.io/ggml-org/llama.cpp:server + args: + - "--model" + - "/models/Qwen3-Coder-30B-A3B-Instruct-IQ4_XS-4.20bpw.gguf" + - "--host" + - "0.0.0.0" + - "--port" + - "8080" + - "--metrics" + - "--ctx-size" + - "32768" + - "--parallel" + - "1" + - "--cache-type-k" + - "q8_0" + - "--cache-type-v" + - "q8_0" + ports: + - name: http + containerPort: 8080 + + volumeMounts: + - name: models + mountPath: /models + + resources: + requests: + cpu: "8" + memory: "24Gi" + limits: + cpu: "12" + memory: "24Gi" + + volumes: + - name: models + persistentVolumeClaim: + claimName: llama-cpu-models-pvc +--- +apiVersion: v1 +kind: Service +metadata: + name: llama-server-cpu + namespace: llama +spec: + selector: + app: llama-server-cpu + ports: + - name: http + port: 8080 + targetPort: http + type: ClusterIP +--- +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: llama-server-cpu + namespace: llama + labels: + app: llama-server-cpu +spec: + namespaceSelector: + matchNames: + - llama + selector: + matchLabels: + app: llama-server-cpu + podMetricsEndpoints: + - port: http + path: /metrics + interval: 15s diff --git a/manifests/llama/gpu-exporter.yaml b/manifests/llama/gpu-exporter.yaml new file mode 100644 index 0000000..705cbcf --- /dev/null +++ b/manifests/llama/gpu-exporter.yaml @@ -0,0 +1,62 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: radeon-exporter + namespace: llama + labels: + app: radeon-exporter +spec: + selector: + matchLabels: + app: radeon-exporter + template: + metadata: + labels: + app: radeon-exporter + spec: + nodeSelector: + gpu: amd + containers: + - name: radeon-exporter + image: kmulvey/radeon_exporter:latest + imagePullPolicy: IfNotPresent + ports: + - name: metrics + containerPort: 9200 + securityContext: + privileged: true + volumeMounts: + - name: sys + mountPath: /sys + readOnly: true + - name: dri + mountPath: /dev/dri + readOnly: true + volumes: + - name: sys + hostPath: + path: /sys + type: Directory + - name: dri + hostPath: + path: /dev/dri + type: Directory +--- +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: radeon-exporter + namespace: llama + labels: + monitoring: primary +spec: + namespaceSelector: + matchNames: + - llama + selector: + matchLabels: + app: radeon-exporter + podMetricsEndpoints: + - port: metrics + path: /metrics + interval: 15s diff --git a/manifests/llama/litellm-db.yaml b/manifests/llama/litellm-db.yaml new file mode 100644 index 0000000..75fa931 --- /dev/null +++ b/manifests/llama/litellm-db.yaml @@ -0,0 +1,116 @@ +apiVersion: v1 +kind: Secret +metadata: + name: litellm-postgres + namespace: llama +type: Opaque +stringData: + POSTGRES_DB: litellm + POSTGRES_USER: litellm + POSTGRES_PASSWORD: 7792e47efbc7348155f54a15ed34dc1d06716b2b1848711d0ee90e3461883c0d +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: litellm-postgres + namespace: llama + labels: + app.kubernetes.io/name: litellm-postgres +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: litellm-postgres + namespace: llama + labels: + app.kubernetes.io/name: litellm-postgres + app.kubernetes.io/component: database +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: litellm-postgres + app.kubernetes.io/component: database + template: + metadata: + labels: + app.kubernetes.io/name: litellm-postgres + app.kubernetes.io/component: database + spec: + containers: + - name: postgres + image: postgres:16 + imagePullPolicy: IfNotPresent + ports: + - name: postgres + containerPort: 5432 + env: + - name: POSTGRES_DB + valueFrom: + secretKeyRef: + name: litellm-postgres + key: POSTGRES_DB + - name: POSTGRES_USER + valueFrom: + secretKeyRef: + name: litellm-postgres + key: POSTGRES_USER + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: litellm-postgres + key: POSTGRES_PASSWORD + volumeMounts: + - name: data + mountPath: /var/lib/postgresql + readinessProbe: + exec: + command: + - sh + - -c + - pg_isready -U "$POSTGRES_USER" -d "$POSTGRES_DB" + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + exec: + command: + - sh + - -c + - pg_isready -U "$POSTGRES_USER" -d "$POSTGRES_DB" + initialDelaySeconds: 20 + periodSeconds: 20 + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 256Mi + volumes: + - name: data + persistentVolumeClaim: + claimName: litellm-postgres +--- +apiVersion: v1 +kind: Service +metadata: + name: litellm-postgres + namespace: llama + labels: + app.kubernetes.io/name: litellm-postgres + app.kubernetes.io/component: database +spec: + selector: + app.kubernetes.io/name: litellm-postgres + app.kubernetes.io/component: database + ports: + - name: postgres + port: 5432 + targetPort: postgres + type: ClusterIP diff --git a/manifests/llama/litellm.yaml b/manifests/llama/litellm.yaml new file mode 100644 index 0000000..2b8c3c2 --- /dev/null +++ b/manifests/llama/litellm.yaml @@ -0,0 +1,202 @@ +apiVersion: v1 +kind: Secret +metadata: + name: litellm-secret + namespace: llama + labels: + app.kubernetes.io/name: litellm + app.kubernetes.io/component: gateway +type: Opaque +stringData: + LITELLM_MASTER_KEY: "6991c7c0f02b4bcf" +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: litellm-config + namespace: llama + labels: + app.kubernetes.io/name: litellm + app.kubernetes.io/component: gateway +data: + config.yaml: | + model_list: + - model_name: fast + litellm_params: + model: openai/fast + api_base: "http://llama-server-gpu.llama.svc.cluster.local:8080/v1" + api_key: none + + - model_name: smart + litellm_params: + model: openai/smart + api_base: "http://llama-server-cpu.llama.svc.cluster.local:8080/v1" + api_key: none + + - model_name: rp + litellm_params: + model: openai/rp + api_base: "http://llama-server-gpu-rp.llama.svc.cluster.local:8080/v1" + api_key: none + litellm_settings: + callbacks: + - prometheus + general_settings: + store_model_in_db: true + store_prompts_in_spend_logs: true +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: litellm + namespace: llama + labels: + app.kubernetes.io/name: litellm + app.kubernetes.io/component: gateway + app.kubernetes.io/part-of: llama-stack + monitoring: prometheus +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: litellm + app.kubernetes.io/component: gateway + template: + metadata: + labels: + app.kubernetes.io/name: litellm + app.kubernetes.io/component: gateway + app.kubernetes.io/part-of: llama-stack + monitoring: prometheus + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "4000" + prometheus.io/path: "/metrics" + spec: + containers: + - name: litellm + image: ghcr.io/berriai/litellm:v1.82.6.rc.3 + imagePullPolicy: IfNotPresent + args: + - "--config" + - "/app/config.yaml" + env: + - name: LITELLM_MASTER_KEY + valueFrom: + secretKeyRef: + name: litellm-secret + key: LITELLM_MASTER_KEY + - name: POSTGRES_USER + valueFrom: + secretKeyRef: + name: litellm-postgres + key: POSTGRES_USER + + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: litellm-postgres + key: POSTGRES_PASSWORD + + - name: POSTGRES_DB + valueFrom: + secretKeyRef: + name: litellm-postgres + key: POSTGRES_DB + + - name: DATABASE_URL + value: "postgresql://$(POSTGRES_USER):$(POSTGRES_PASSWORD)@litellm-postgres.llama.svc.cluster.local:5432/$(POSTGRES_DB)" + ports: + - name: http + containerPort: 4000 + protocol: TCP + volumeMounts: + - name: litellm-config + mountPath: /app/config.yaml + subPath: config.yaml + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + volumes: + - name: litellm-config + configMap: + name: litellm-config +--- +apiVersion: v1 +kind: Service +metadata: + name: litellm + namespace: llama + labels: + app.kubernetes.io/name: litellm + app.kubernetes.io/component: gateway + app.kubernetes.io/part-of: llama-stack + monitoring: prometheus +spec: + selector: + app.kubernetes.io/name: litellm + app.kubernetes.io/component: gateway + ports: + - name: http + port: 4000 + targetPort: http + protocol: TCP + type: ClusterIP +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: litellm + namespace: llama + labels: + app.kubernetes.io/name: litellm + app.kubernetes.io/component: gateway + app.kubernetes.io/part-of: llama-stack + annotations: + cert-manager.io/cluster-issuer: letsencrypt-production + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" +spec: + ingressClassName: traefik + tls: + - hosts: + - litellm.mrt0rtikize.ru + secretName: web-echo-tls + rules: + - host: litellm.mrt0rtikize.ru + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: litellm + port: + number: 4000 +--- +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: litellm + namespace: llama + labels: + app.kubernetes.io/name: litellm + app.kubernetes.io/component: gateway + app.kubernetes.io/part-of: llama-stack + release: kube-prometheus-stack +spec: + namespaceSelector: + matchNames: + - llama + selector: + matchLabels: + app.kubernetes.io/name: litellm + app.kubernetes.io/component: gateway + podMetricsEndpoints: + - port: http + path: /metrics + interval: 30s diff --git a/manifests/llama/main.yaml b/manifests/llama/main.yaml new file mode 100644 index 0000000..9a62ef3 --- /dev/null +++ b/manifests/llama/main.yaml @@ -0,0 +1,166 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llama-server-gpu + namespace: llama +spec: + replicas: 1 + selector: + matchLabels: + app: llama-server-gpu + template: + metadata: + labels: + app: llama-server-gpu + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8080" + prometheus.io/path: "/metrics" + spec: + nodeSelector: + gpu: amd + + initContainers: + - name: download-model + image: python:3.11-slim + env: + - name: HF_HOME + value: /models/.hf + - name: MODEL_REPO + value: "byteshape/Devstral-Small-2-24B-Instruct-2512-GGUF" + - name: MODEL_FILE + value: "Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf" + # optional, only if you need gated/private models + # - name: HUGGING_FACE_HUB_TOKEN + # valueFrom: + # secretKeyRef: + # name: hf-token + # key: token + command: + - /bin/sh + - -c + - | + set -eux + + MODEL_PATH="/models/${MODEL_FILE}" + + if [ -f "${MODEL_PATH}" ]; then + echo "Model already exists at ${MODEL_PATH}, skipping download" + exit 0 + fi + + echo "Installing Hugging Face Hub downloader" + pip install --no-cache-dir huggingface_hub + + echo "Downloading ${MODEL_REPO}/${MODEL_FILE}" + python - <<'PY' + import os + from huggingface_hub import hf_hub_download + + repo_id = os.environ["MODEL_REPO"] + filename = os.environ["MODEL_FILE"] + + token = os.environ.get("HUGGING_FACE_HUB_TOKEN") + + path = hf_hub_download( + repo_id=repo_id, + filename=filename, + local_dir="/models", + local_dir_use_symlinks=False, + token=token, + ) + print(f"Downloaded to: {path}") + PY + + ls -lah /models + volumeMounts: + - name: models + mountPath: /models + + containers: + - name: llama + image: ghcr.io/ggml-org/llama.cpp:server-vulkan + args: + - "--model" + - "/models/Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf" + - "--host" + - "0.0.0.0" + - "--port" + - "8080" + - "--n-gpu-layers" + - "999" + - "--metrics" + + # performance tuning + - "--ctx-size" + - "32768" + - "--parallel" + - "4" + + # KV cache quantization + - "--cache-type-k" + - "q8_0" + - "--cache-type-v" + - "q8_0" + ports: + - name: http + containerPort: 8080 + + securityContext: + privileged: true + + volumeMounts: + - name: models + mountPath: /models + - name: dri + mountPath: /dev/dri + + resources: + requests: + cpu: "2" + memory: "4Gi" + limits: + cpu: "2" + memory: "4Gi" + + volumes: + - name: models + persistentVolumeClaim: + claimName: llama-gpu-models-pvc + - name: dri + hostPath: + path: /dev/dri + type: Directory +--- +apiVersion: v1 +kind: Service +metadata: + name: llama-server-gpu + namespace: llama +spec: + selector: + app: llama-server-gpu + ports: + - name: http + port: 8080 + targetPort: http + type: ClusterIP +--- +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: llama-server-gpu + namespace: llama + labels: + app: llama-server-gpu +spec: + namespaceSelector: + matchNames: + - llama + selector: + matchLabels: + app: llama-server-gpu + podMetricsEndpoints: + - port: http + path: /metrics + interval: 15s diff --git a/manifests/llama/namespace.yaml b/manifests/llama/namespace.yaml new file mode 100644 index 0000000..d68229b --- /dev/null +++ b/manifests/llama/namespace.yaml @@ -0,0 +1,42 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: llama +--- +# apiVersion: storage.k8s.io/v1 +# kind: StorageClass +# metadata: +# name: longhorn-llama +# provisioner: driver.longhorn.io +# parameters: +# numberOfReplicas: "2" +# staleReplicaTimeout: "30" +# allowVolumeExpansion: true +# reclaimPolicy: Retain +# volumeBindingMode: Immediate +# --- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: llama-gpu-models-pvc + namespace: llama +spec: + accessModes: + - ReadWriteOnce + # storageClassName: longhorn-llama + resources: + requests: + storage: 50Gi +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: llama-cpu-models-pvc + namespace: llama +spec: + accessModes: + - ReadWriteOnce + # storageClassName: longhorn-llama + resources: + requests: + storage: 100Gi diff --git a/manifests/llama/rp.yaml b/manifests/llama/rp.yaml new file mode 100644 index 0000000..1273e34 --- /dev/null +++ b/manifests/llama/rp.yaml @@ -0,0 +1,166 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llama-server-gpu-rp + namespace: llama +spec: + replicas: 1 + selector: + matchLabels: + app: llama-server-gpu-rp + template: + metadata: + labels: + app: llama-server-gpu-rp + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8080" + prometheus.io/path: "/metrics" + spec: + nodeSelector: + gpu: amd + + initContainers: + - name: download-model + image: python:3.11-slim + env: + - name: HF_HOME + value: /models/.hf + - name: MODEL_REPO + value: "mradermacher/Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B-GGUF" + - name: MODEL_FILE + value: "Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B.Q4_K_S.gguf" + # optional, only if you need gated/private models + # - name: HUGGING_FACE_HUB_TOKEN + # valueFrom: + # secretKeyRef: + # name: hf-token + # key: token + command: + - /bin/sh + - -c + - | + set -eux + + MODEL_PATH="/models/${MODEL_FILE}" + + if [ -f "${MODEL_PATH}" ]; then + echo "Model already exists at ${MODEL_PATH}, skipping download" + exit 0 + fi + + echo "Installing Hugging Face Hub downloader" + pip install --no-cache-dir huggingface_hub + + echo "Downloading ${MODEL_REPO}/${MODEL_FILE}" + python - <<'PY' + import os + from huggingface_hub import hf_hub_download + + repo_id = os.environ["MODEL_REPO"] + filename = os.environ["MODEL_FILE"] + + token = os.environ.get("HUGGING_FACE_HUB_TOKEN") + + path = hf_hub_download( + repo_id=repo_id, + filename=filename, + local_dir="/models", + local_dir_use_symlinks=False, + token=token, + ) + print(f"Downloaded to: {path}") + PY + + ls -lah /models + volumeMounts: + - name: models + mountPath: /models + + containers: + - name: llama + image: ghcr.io/ggml-org/llama.cpp:server-vulkan + args: + - "--model" + - "/models/Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B.Q4_K_S.gguf" + - "--host" + - "0.0.0.0" + - "--port" + - "8080" + - "--n-gpu-layers" + - "999" + - "--metrics" + + # performance tuning + - "--ctx-size" + - "32768" + - "--parallel" + - "1" + + # KV cache quantization + - "--cache-type-k" + - "q8_0" + - "--cache-type-v" + - "q8_0" + ports: + - name: http + containerPort: 8080 + + securityContext: + privileged: true + + volumeMounts: + - name: models + mountPath: /models + - name: dri + mountPath: /dev/dri + + resources: + requests: + cpu: "2" + memory: "4Gi" + limits: + cpu: "2" + memory: "4Gi" + + volumes: + - name: models + persistentVolumeClaim: + claimName: llama-gpu-models-pvc + - name: dri + hostPath: + path: /dev/dri + type: Directory +--- +apiVersion: v1 +kind: Service +metadata: + name: llama-server-gpu-rp + namespace: llama +spec: + selector: + app: llama-server-gpu-rp + ports: + - name: http + port: 8080 + targetPort: http + type: ClusterIP +--- +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: llama-server-gpu-rp + namespace: llama + labels: + app: llama-server-gpu-rp +spec: + namespaceSelector: + matchNames: + - llama + selector: + matchLabels: + app: llama-server-gpu-rp + podMetricsEndpoints: + - port: http + path: /metrics + interval: 15s diff --git a/manifests/longhorn/longhorn-ingress.yaml b/manifests/longhorn/longhorn-ingress.yaml new file mode 100644 index 0000000..de61447 --- /dev/null +++ b/manifests/longhorn/longhorn-ingress.yaml @@ -0,0 +1,26 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: longhorn-ingress + namespace: longhorn-system + annotations: + cert-manager.io/cluster-issuer: letsencrypt-production + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" +spec: + ingressClassName: traefik # We use Traefik as the ingress controller + tls: + - hosts: + - longhorn.mrt0rtikize.ru + secretName: longhorn-tls + rules: + - host: longhorn.mrt0rtikize.ru + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: longhorn-frontend # Service managing Longhorn dashboard + port: + number: 80 # Service port where Longhorn UI runs diff --git a/manifests/longhorn/test-pvc.yaml b/manifests/longhorn/test-pvc.yaml new file mode 100644 index 0000000..ba5f8a3 --- /dev/null +++ b/manifests/longhorn/test-pvc.yaml @@ -0,0 +1,23 @@ +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: longhorn +provisioner: driver.longhorn.io +parameters: + numberOfReplicas: '2' + staleReplicaTimeout: '30' +allowVolumeExpansion: true +reclaimPolicy: Retain +volumeBindingMode: Immediate +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: longhorn-pvc +spec: + accessModes: + - ReadWriteOnce + storageClassName: longhorn + resources: + requests: + storage: 2Gi diff --git a/manifests/metallb/ip-address-pool.yaml b/manifests/metallb/ip-address-pool.yaml new file mode 100644 index 0000000..eb24987 --- /dev/null +++ b/manifests/metallb/ip-address-pool.yaml @@ -0,0 +1,10 @@ +apiVersion: metallb.io/v1beta1 +kind: IPAddressPool +metadata: + name: default-address-pool + namespace: metallb-system +spec: + addresses: + - 10.0.0.120-10.0.0.200 + autoAssign: true + avoidBuggyIPs: true diff --git a/manifests/metallb/l2advert.yaml b/manifests/metallb/l2advert.yaml new file mode 100644 index 0000000..6d65e99 --- /dev/null +++ b/manifests/metallb/l2advert.yaml @@ -0,0 +1,8 @@ +apiVersion: metallb.io/v1beta1 +kind: L2Advertisement +metadata: + name: default-advertisement + namespace: metallb-system +spec: + ipAddressPools: + - default-address-pool diff --git a/manifests/metrics/grafana-ingress.yaml b/manifests/metrics/grafana-ingress.yaml new file mode 100644 index 0000000..29413d3 --- /dev/null +++ b/manifests/metrics/grafana-ingress.yaml @@ -0,0 +1,26 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: grafana + namespace: metrics + annotations: + cert-manager.io/cluster-issuer: letsencrypt-production + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" +spec: + ingressClassName: traefik + tls: + - hosts: + - grafana.mrt0rtikize.ru + secretName: grafana-tls + rules: + - host: grafana.mrt0rtikize.ru + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: kube-prometheus-stack-grafana + port: + number: 80 diff --git a/manifests/metrics/kube-prometheus-stack-values.yaml b/manifests/metrics/kube-prometheus-stack-values.yaml new file mode 100644 index 0000000..c4fb4b3 --- /dev/null +++ b/manifests/metrics/kube-prometheus-stack-values.yaml @@ -0,0 +1,90 @@ +fullnameOverride: kube-prometheus +namespaceOverride: metrics + +prometheusOperator: + namespace: metrics + admissionWebhooks: + failurePolicy: Ignore + +alertmanager: + enabled: true + alertmanagerSpec: + resources: + requests: + cpu: 50m + memory: 128Mi + limits: + cpu: 200m + memory: 512Mi + storage: + volumeClaimTemplate: + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + +prometheus: + enabled: true + prometheusSpec: + replicas: 1 + retention: 15d + walCompression: true + serviceMonitorSelectorNilUsesHelmValues: false + podMonitorSelectorNilUsesHelmValues: false + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + cpu: 1000m + memory: 1Gi + storageSpec: + volumeClaimTemplate: + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 50Gi + remoteWrite: + - url: http://victoria-metrics.metrics.svc.cluster.local:8428/api/v1/write + queueConfig: + maxSamplesPerSend: 10000 + capacity: 5000 + maxShards: 30 + +kubeEtcd: + enabled: false + +kubeControllerManager: + enabled: false + +kubeScheduler: + enabled: false + +kubeProxy: + enabled: false + +grafana: + enabled: true + adminUser: admin + adminPassword: change-me + defaultDashboardsEnabled: true + resources: + requests: + cpu: 50m + memory: 256Mi + limits: + cpu: 200m + memory: 512Mi + persistence: + enabled: true + size: 10Gi + additionalDataSources: + - name: victoria-metrics + type: prometheus + access: proxy + url: http://victoria-metrics.metrics.svc.cluster.local:8428 + isDefault: false diff --git a/manifests/metrics/namespace.yaml b/manifests/metrics/namespace.yaml new file mode 100644 index 0000000..6d57933 --- /dev/null +++ b/manifests/metrics/namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: metrics diff --git a/manifests/metrics/victoria-metrics-service.yaml b/manifests/metrics/victoria-metrics-service.yaml new file mode 100644 index 0000000..a99b2e2 --- /dev/null +++ b/manifests/metrics/victoria-metrics-service.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Service +metadata: + name: victoria-metrics + namespace: metrics + labels: + app.kubernetes.io/name: victoria-metrics-single + app.kubernetes.io/instance: victoria-metrics-single + app: server +spec: + selector: + app.kubernetes.io/name: victoria-metrics-single + app.kubernetes.io/instance: victoria-metrics-single + app: server + ports: + - name: http + port: 8428 + targetPort: 8428 + type: ClusterIP diff --git a/manifests/metrics/victoria-metrics-single-values.yaml b/manifests/metrics/victoria-metrics-single-values.yaml new file mode 100644 index 0000000..fb63e48 --- /dev/null +++ b/manifests/metrics/victoria-metrics-single-values.yaml @@ -0,0 +1,23 @@ +fullnameOverride: victoria-metrics-single +namespaceOverride: metrics + +server: + retentionPeriod: 30d + scrapeInterval: 30s + replicaCount: 1 + persistentVolume: + enabled: true + size: 200Gi + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 1Gi + service: + type: ClusterIP + port: 8428 + +serviceAccount: + create: true diff --git a/manifests/sillytavern/configmap.yaml b/manifests/sillytavern/configmap.yaml new file mode 100644 index 0000000..f2543ea --- /dev/null +++ b/manifests/sillytavern/configmap.yaml @@ -0,0 +1,122 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: sillytavern-config + namespace: sillytavern +data: + config.yaml: | + dataRoot: ./data + listen: true + listenAddress: + ipv4: 0.0.0.0 + ipv6: '[::]' + protocol: + ipv4: true + ipv6: false + dnsPreferIPv6: false + browserLaunch: + enabled: false + browser: 'default' + hostname: 'auto' + port: -1 + avoidLocalhost: false + port: 8000 + ssl: + enabled: false + certPath: "./certs/cert.pem" + keyPath: "./certs/privkey.pem" + keyPassphrase: "" + whitelistMode: false + enableForwardedWhitelist: false + whitelist: + - ::1 + - 127.0.0.1 + whitelistDockerHosts: false + basicAuthMode: false + basicAuthUser: + username: "user" + password: "password" + enableCorsProxy: false + requestProxy: + enabled: false + url: "socks5://username:password@example.com:1080" + bypass: + - localhost + - 127.0.0.1 + enableUserAccounts: false + enableDiscreetLogin: false + perUserBasicAuth: false + sso: + autheliaAuth: false + authentikAuth: false + hostWhitelist: + enabled: false + scan: true + hosts: [] + sessionTimeout: -1 + disableCsrfProtection: false + securityOverride: false + logging: + enableAccessLog: true + minLogLevel: 0 + rateLimiting: + preferRealIpHeader: false + backups: + common: + numberOfBackups: 50 + chat: + enabled: true + checkIntegrity: true + maxTotalBackups: -1 + throttleInterval: 10000 + thumbnails: + enabled: true + format: "jpg" + quality: 95 + dimensions: { 'bg': [160, 90], 'avatar': [96, 144], 'persona': [96, 144] } + performance: + lazyLoadCharacters: false + memoryCacheCapacity: '100mb' + useDiskCache: true + cacheBuster: + enabled: false + userAgentPattern: '' + allowKeysExposure: false + skipContentCheck: false + whitelistImportDomains: + - localhost + - cdn.discordapp.com + - files.catbox.moe + - raw.githubusercontent.com + - char-archive.evulid.cc + requestOverrides: [] + extensions: + enabled: true + autoUpdate: true + models: + autoDownload: true + classification: Cohee/distilbert-base-uncased-go-emotions-onnx + captioning: Xenova/vit-gpt2-image-captioning + embedding: Cohee/jina-embeddings-v2-base-en + speechToText: Xenova/whisper-small + textToSpeech: Xenova/speecht5_tts + enableDownloadableTokenizers: true + promptPlaceholder: "[Start a new chat]" + openai: + randomizeUserId: false + captionSystemPrompt: "" + deepl: + formality: default + mistral: + enablePrefix: false + ollama: + keepAlive: -1 + batchSize: -1 + claude: + enableSystemPromptCache: false + cachingAtDepth: -1 + extendedTTL: false + gemini: + apiVersion: 'v1beta' + enableServerPlugins: false + enableServerPluginsAutoUpdate: true diff --git a/manifests/sillytavern/deployment.yaml b/manifests/sillytavern/deployment.yaml new file mode 100644 index 0000000..6dd932f --- /dev/null +++ b/manifests/sillytavern/deployment.yaml @@ -0,0 +1,61 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: sillytavern + namespace: sillytavern +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app: sillytavern + template: + metadata: + labels: + app: sillytavern + spec: + containers: + - name: sillytavern + image: ghcr.io/sillytavern/sillytavern:latest + ports: + - containerPort: 8000 + protocol: TCP + env: + - name: NODE_ENV + value: production + - name: FORCE_COLOR + value: "1" + envFrom: + - secretRef: + name: sillytavern-auth + volumeMounts: + - name: config + mountPath: /home/node/app/config/config.yaml + subPath: config.yaml + - name: data + mountPath: /home/node/app/data + - name: plugins + mountPath: /home/node/app/plugins + - name: extensions + mountPath: /home/node/app/public/scripts/extensions/third-party + resources: + requests: + cpu: "1" + memory: 1Gi + limits: + cpu: "4" + memory: 4Gi + volumes: + - name: config + configMap: + name: sillytavern-config + - name: data + persistentVolumeClaim: + claimName: sillytavern-data + - name: plugins + persistentVolumeClaim: + claimName: sillytavern-plugins + - name: extensions + persistentVolumeClaim: + claimName: sillytavern-extensions diff --git a/manifests/sillytavern/ingress.yaml b/manifests/sillytavern/ingress.yaml new file mode 100644 index 0000000..0b70c62 --- /dev/null +++ b/manifests/sillytavern/ingress.yaml @@ -0,0 +1,30 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: sillytavern + namespace: sillytavern + labels: + app.kubernetes.io/name: sillytavern + app.kubernetes.io/component: frontend + app.kubernetes.io/part-of: sillytavern + annotations: + cert-manager.io/cluster-issuer: letsencrypt-production + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" +spec: + ingressClassName: traefik + tls: + - hosts: + - sillytavern.mrt0rtikize.ru + secretName: sillytavern-tls + rules: + - host: sillytavern.mrt0rtikize.ru + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: sillytavern + port: + number: 8000 diff --git a/manifests/sillytavern/namespace.yaml b/manifests/sillytavern/namespace.yaml new file mode 100644 index 0000000..7f40ad6 --- /dev/null +++ b/manifests/sillytavern/namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: sillytavern diff --git a/manifests/sillytavern/pvc.yaml b/manifests/sillytavern/pvc.yaml new file mode 100644 index 0000000..06508ac --- /dev/null +++ b/manifests/sillytavern/pvc.yaml @@ -0,0 +1,35 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: sillytavern-data + namespace: sillytavern +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: sillytavern-plugins + namespace: sillytavern +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: sillytavern-extensions + namespace: sillytavern +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi diff --git a/manifests/sillytavern/secret.yaml b/manifests/sillytavern/secret.yaml new file mode 100644 index 0000000..b133092 --- /dev/null +++ b/manifests/sillytavern/secret.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: Secret +metadata: + name: sillytavern-auth + namespace: sillytavern +type: Opaque +stringData: + SILLYTAVERN_BASICAUTHMODE: "true" + SILLYTAVERN_BASICAUTHUSER_USERNAME: admin + SILLYTAVERN_BASICAUTHUSER_PASSWORD: 0cdaa30c396dae77 diff --git a/manifests/sillytavern/service.yaml b/manifests/sillytavern/service.yaml new file mode 100644 index 0000000..16c1840 --- /dev/null +++ b/manifests/sillytavern/service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: sillytavern + namespace: sillytavern +spec: + selector: + app: sillytavern + ports: + - port: 8000 + targetPort: 8000 + protocol: TCP + type: ClusterIP diff --git a/metrics/README.md b/metrics/README.md new file mode 100644 index 0000000..71fca4c --- /dev/null +++ b/metrics/README.md @@ -0,0 +1,62 @@ +# metrics stack + +Opinionated manifests for deploying kube-prometheus-stack (Prometheus Operator + Grafana) together with a VictoriaMetrics single-node database in the `metrics` namespace. + +## Install / upgrade + +```sh +kubectl apply -f metrics/namespace.yaml + +# kube-prometheus-stack +target=sc prometheus-community +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +helm upgrade --install kube-prometheus-stack prometheus-community/kube-prometheus-stack \ + --namespace metrics \ + --values metrics/kube-prometheus-stack-values.yaml + +kubectl --namespace metrics get secret kube-prometheus-stack-grafana \ + -o jsonpath="{.data.admin-password}" | base64 -d +echo + +# expose grafana via Traefik +kubectl apply -f metrics/grafana-ingress.yaml +kubectl -n metrics get ingress grafana + +# victoria metrics for long-term storage +helm repo add victoria-metrics https://victoriametrics.github.io/helm-charts +helm upgrade --install victoria-metrics-single victoria-metrics/victoria-metrics-single \ + --namespace metrics \ + --values metrics/victoria-metrics-single-values.yaml + +# expose victoria metrics via ClusterIP for Prometheus/Grafana +kubectl apply -f metrics/victoria-metrics-service.yaml +``` + +The manifests default to the Yandex Managed Kubernetes dynamic storage class `yc-network-hdd`; tweak the `storageClassName`/`storageClass` fields and capacities if you prefer something else. +Before applying `metrics/grafana-ingress.yaml`, update the host (`grafana.playground.t01tt.tech`) and, if needed, change the `cert-manager.io/cluster-issuer` annotation to match your staging/production workflow. The ingress uses the `traefik` ingress class. + +## Components + +- **Prometheus Operator** provisions Prometheus, Alertmanager and related CRDs. Remote write targets VictoriaMetrics for durable retention. +- **Grafana** is pre-provisioned with persistence enabled and a secondary data source pointing at VictoriaMetrics. +- **VictoriaMetrics** stores metrics for long-term retention while also serving query traffic for Grafana. A dedicated ClusterIP service (`metrics/victoria-metrics-service.yaml`) exposes port 8428 for Prometheus remote write and Grafana queries. + +## Database choices + +Prometheus ships with an embedded TSDB. For longer retention, clustering or multi-tenant needs you can offload data to: + +- **VictoriaMetrics** (single, clustered, or managed) – cost-efficient, Prometheus-compatible, supports multi-year retention. +- **Thanos / Cortex / Grafana Mimir** – horizontally scalable object-storage backed TSDBs with multi-cluster federation. +- **ClickHouse / TimescaleDB / PostgreSQL** – SQL stores for advanced analytics (requires Promscale or similar adapter). +- **Graphite / InfluxDB** – legacy or streaming-friendly stores; integrate via remote write adapters. + +Pick the backend that matches your retention and query latency requirements. Remote write configuration lives under `prometheus.prometheusSpec.remoteWrite` in `kube-prometheus-stack-values.yaml`. + +## Post-install checks + +```sh +kubectl -n metrics get pods +kubectl -n metrics get svc +kubectl -n metrics get prometheus,prometheusrules,servicemonitors -A +``` diff --git a/metrics/grafana-ingress.yaml b/metrics/grafana-ingress.yaml new file mode 100644 index 0000000..29413d3 --- /dev/null +++ b/metrics/grafana-ingress.yaml @@ -0,0 +1,26 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: grafana + namespace: metrics + annotations: + cert-manager.io/cluster-issuer: letsencrypt-production + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" +spec: + ingressClassName: traefik + tls: + - hosts: + - grafana.mrt0rtikize.ru + secretName: grafana-tls + rules: + - host: grafana.mrt0rtikize.ru + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: kube-prometheus-stack-grafana + port: + number: 80 diff --git a/metrics/kube-prometheus-stack-values.yaml b/metrics/kube-prometheus-stack-values.yaml new file mode 100644 index 0000000..c4fb4b3 --- /dev/null +++ b/metrics/kube-prometheus-stack-values.yaml @@ -0,0 +1,90 @@ +fullnameOverride: kube-prometheus +namespaceOverride: metrics + +prometheusOperator: + namespace: metrics + admissionWebhooks: + failurePolicy: Ignore + +alertmanager: + enabled: true + alertmanagerSpec: + resources: + requests: + cpu: 50m + memory: 128Mi + limits: + cpu: 200m + memory: 512Mi + storage: + volumeClaimTemplate: + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + +prometheus: + enabled: true + prometheusSpec: + replicas: 1 + retention: 15d + walCompression: true + serviceMonitorSelectorNilUsesHelmValues: false + podMonitorSelectorNilUsesHelmValues: false + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + cpu: 1000m + memory: 1Gi + storageSpec: + volumeClaimTemplate: + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 50Gi + remoteWrite: + - url: http://victoria-metrics.metrics.svc.cluster.local:8428/api/v1/write + queueConfig: + maxSamplesPerSend: 10000 + capacity: 5000 + maxShards: 30 + +kubeEtcd: + enabled: false + +kubeControllerManager: + enabled: false + +kubeScheduler: + enabled: false + +kubeProxy: + enabled: false + +grafana: + enabled: true + adminUser: admin + adminPassword: change-me + defaultDashboardsEnabled: true + resources: + requests: + cpu: 50m + memory: 256Mi + limits: + cpu: 200m + memory: 512Mi + persistence: + enabled: true + size: 10Gi + additionalDataSources: + - name: victoria-metrics + type: prometheus + access: proxy + url: http://victoria-metrics.metrics.svc.cluster.local:8428 + isDefault: false diff --git a/metrics/namespace.yaml b/metrics/namespace.yaml new file mode 100644 index 0000000..6d57933 --- /dev/null +++ b/metrics/namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: metrics diff --git a/metrics/victoria-metrics-service.yaml b/metrics/victoria-metrics-service.yaml new file mode 100644 index 0000000..a99b2e2 --- /dev/null +++ b/metrics/victoria-metrics-service.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Service +metadata: + name: victoria-metrics + namespace: metrics + labels: + app.kubernetes.io/name: victoria-metrics-single + app.kubernetes.io/instance: victoria-metrics-single + app: server +spec: + selector: + app.kubernetes.io/name: victoria-metrics-single + app.kubernetes.io/instance: victoria-metrics-single + app: server + ports: + - name: http + port: 8428 + targetPort: 8428 + type: ClusterIP diff --git a/metrics/victoria-metrics-single-values.yaml b/metrics/victoria-metrics-single-values.yaml new file mode 100644 index 0000000..fb63e48 --- /dev/null +++ b/metrics/victoria-metrics-single-values.yaml @@ -0,0 +1,23 @@ +fullnameOverride: victoria-metrics-single +namespaceOverride: metrics + +server: + retentionPeriod: 30d + scrapeInterval: 30s + replicaCount: 1 + persistentVolume: + enabled: true + size: 200Gi + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 1Gi + service: + type: ClusterIP + port: 8428 + +serviceAccount: + create: true diff --git a/sillytavern/configmap.yaml b/sillytavern/configmap.yaml new file mode 100644 index 0000000..f2543ea --- /dev/null +++ b/sillytavern/configmap.yaml @@ -0,0 +1,122 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: sillytavern-config + namespace: sillytavern +data: + config.yaml: | + dataRoot: ./data + listen: true + listenAddress: + ipv4: 0.0.0.0 + ipv6: '[::]' + protocol: + ipv4: true + ipv6: false + dnsPreferIPv6: false + browserLaunch: + enabled: false + browser: 'default' + hostname: 'auto' + port: -1 + avoidLocalhost: false + port: 8000 + ssl: + enabled: false + certPath: "./certs/cert.pem" + keyPath: "./certs/privkey.pem" + keyPassphrase: "" + whitelistMode: false + enableForwardedWhitelist: false + whitelist: + - ::1 + - 127.0.0.1 + whitelistDockerHosts: false + basicAuthMode: false + basicAuthUser: + username: "user" + password: "password" + enableCorsProxy: false + requestProxy: + enabled: false + url: "socks5://username:password@example.com:1080" + bypass: + - localhost + - 127.0.0.1 + enableUserAccounts: false + enableDiscreetLogin: false + perUserBasicAuth: false + sso: + autheliaAuth: false + authentikAuth: false + hostWhitelist: + enabled: false + scan: true + hosts: [] + sessionTimeout: -1 + disableCsrfProtection: false + securityOverride: false + logging: + enableAccessLog: true + minLogLevel: 0 + rateLimiting: + preferRealIpHeader: false + backups: + common: + numberOfBackups: 50 + chat: + enabled: true + checkIntegrity: true + maxTotalBackups: -1 + throttleInterval: 10000 + thumbnails: + enabled: true + format: "jpg" + quality: 95 + dimensions: { 'bg': [160, 90], 'avatar': [96, 144], 'persona': [96, 144] } + performance: + lazyLoadCharacters: false + memoryCacheCapacity: '100mb' + useDiskCache: true + cacheBuster: + enabled: false + userAgentPattern: '' + allowKeysExposure: false + skipContentCheck: false + whitelistImportDomains: + - localhost + - cdn.discordapp.com + - files.catbox.moe + - raw.githubusercontent.com + - char-archive.evulid.cc + requestOverrides: [] + extensions: + enabled: true + autoUpdate: true + models: + autoDownload: true + classification: Cohee/distilbert-base-uncased-go-emotions-onnx + captioning: Xenova/vit-gpt2-image-captioning + embedding: Cohee/jina-embeddings-v2-base-en + speechToText: Xenova/whisper-small + textToSpeech: Xenova/speecht5_tts + enableDownloadableTokenizers: true + promptPlaceholder: "[Start a new chat]" + openai: + randomizeUserId: false + captionSystemPrompt: "" + deepl: + formality: default + mistral: + enablePrefix: false + ollama: + keepAlive: -1 + batchSize: -1 + claude: + enableSystemPromptCache: false + cachingAtDepth: -1 + extendedTTL: false + gemini: + apiVersion: 'v1beta' + enableServerPlugins: false + enableServerPluginsAutoUpdate: true diff --git a/sillytavern/deployment.yaml b/sillytavern/deployment.yaml new file mode 100644 index 0000000..6dd932f --- /dev/null +++ b/sillytavern/deployment.yaml @@ -0,0 +1,61 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: sillytavern + namespace: sillytavern +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app: sillytavern + template: + metadata: + labels: + app: sillytavern + spec: + containers: + - name: sillytavern + image: ghcr.io/sillytavern/sillytavern:latest + ports: + - containerPort: 8000 + protocol: TCP + env: + - name: NODE_ENV + value: production + - name: FORCE_COLOR + value: "1" + envFrom: + - secretRef: + name: sillytavern-auth + volumeMounts: + - name: config + mountPath: /home/node/app/config/config.yaml + subPath: config.yaml + - name: data + mountPath: /home/node/app/data + - name: plugins + mountPath: /home/node/app/plugins + - name: extensions + mountPath: /home/node/app/public/scripts/extensions/third-party + resources: + requests: + cpu: "1" + memory: 1Gi + limits: + cpu: "4" + memory: 4Gi + volumes: + - name: config + configMap: + name: sillytavern-config + - name: data + persistentVolumeClaim: + claimName: sillytavern-data + - name: plugins + persistentVolumeClaim: + claimName: sillytavern-plugins + - name: extensions + persistentVolumeClaim: + claimName: sillytavern-extensions diff --git a/sillytavern/ingress.yaml b/sillytavern/ingress.yaml new file mode 100644 index 0000000..0b70c62 --- /dev/null +++ b/sillytavern/ingress.yaml @@ -0,0 +1,30 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: sillytavern + namespace: sillytavern + labels: + app.kubernetes.io/name: sillytavern + app.kubernetes.io/component: frontend + app.kubernetes.io/part-of: sillytavern + annotations: + cert-manager.io/cluster-issuer: letsencrypt-production + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" +spec: + ingressClassName: traefik + tls: + - hosts: + - sillytavern.mrt0rtikize.ru + secretName: sillytavern-tls + rules: + - host: sillytavern.mrt0rtikize.ru + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: sillytavern + port: + number: 8000 diff --git a/sillytavern/namespace.yaml b/sillytavern/namespace.yaml new file mode 100644 index 0000000..7f40ad6 --- /dev/null +++ b/sillytavern/namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: sillytavern diff --git a/sillytavern/pvc.yaml b/sillytavern/pvc.yaml new file mode 100644 index 0000000..06508ac --- /dev/null +++ b/sillytavern/pvc.yaml @@ -0,0 +1,35 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: sillytavern-data + namespace: sillytavern +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: sillytavern-plugins + namespace: sillytavern +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: sillytavern-extensions + namespace: sillytavern +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi diff --git a/sillytavern/secret.yaml b/sillytavern/secret.yaml new file mode 100644 index 0000000..b133092 --- /dev/null +++ b/sillytavern/secret.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: Secret +metadata: + name: sillytavern-auth + namespace: sillytavern +type: Opaque +stringData: + SILLYTAVERN_BASICAUTHMODE: "true" + SILLYTAVERN_BASICAUTHUSER_USERNAME: admin + SILLYTAVERN_BASICAUTHUSER_PASSWORD: 0cdaa30c396dae77 diff --git a/sillytavern/service.yaml b/sillytavern/service.yaml new file mode 100644 index 0000000..16c1840 --- /dev/null +++ b/sillytavern/service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: sillytavern + namespace: sillytavern +spec: + selector: + app: sillytavern + ports: + - port: 8000 + targetPort: 8000 + protocol: TCP + type: ClusterIP