Initial commit: k3s GitOps manifests with ArgoCD App-of-Apps

This commit is contained in:
2026-05-05 13:18:51 +03:00
commit 5d9a80b976
65 changed files with 3445 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
config

25
argocd/app-of-apps.yaml Normal file
View File

@@ -0,0 +1,25 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: root-app
namespace: argocd
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
source:
repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git
targetRevision: main
path: argocd/apps
directory:
recurse: true
include: "*.yaml"
destination:
server: https://kubernetes.default.svc
namespace: argocd
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true

View File

@@ -0,0 +1,26 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: cert-manager
namespace: argocd
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
source:
repoURL: https://charts.jetstack.io
chart: cert-manager
targetRevision: v1.20.1
helm:
values: |
crds:
enabled: true
destination:
server: https://kubernetes.default.svc
namespace: cert-manager
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true

22
argocd/apps/llama.yaml Normal file
View File

@@ -0,0 +1,22 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: llama
namespace: argocd
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
source:
repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git
targetRevision: main
path: manifests/llama
destination:
server: https://kubernetes.default.svc
namespace: llama
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true

29
argocd/apps/longhorn.yaml Normal file
View File

@@ -0,0 +1,29 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: longhorn
namespace: argocd
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
sources:
- repoURL: https://charts.longhorn.io
chart: longhorn
targetRevision: 1.11.2
helm:
values: |
preUpgradeChecker:
jobEnabled: false
- repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git
targetRevision: main
path: manifests/longhorn
destination:
server: https://kubernetes.default.svc
namespace: longhorn-system
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true

29
argocd/apps/metallb.yaml Normal file
View File

@@ -0,0 +1,29 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: metallb
namespace: argocd
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
sources:
- repoURL: https://metallb.github.io/metallb
chart: metallb
targetRevision: 0.14.5
helm:
values: |
crds:
enabled: true
- repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git
targetRevision: main
path: manifests/metallb
destination:
server: https://kubernetes.default.svc
namespace: metallb-system
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true

37
argocd/apps/metrics.yaml Normal file
View File

@@ -0,0 +1,37 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: metrics
namespace: argocd
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
sources:
- repoURL: https://prometheus-community.github.io/helm-charts
chart: kube-prometheus-stack
targetRevision: 82.16.2
helm:
valueFiles:
- $values/manifests/metrics/kube-prometheus-stack-values.yaml
- repoURL: https://victoriametrics.github.io/helm-charts/
chart: victoria-metrics-single
targetRevision: 0.34.0
helm:
valueFiles:
- $values/manifests/metrics/victoria-metrics-single-values.yaml
- repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git
targetRevision: main
ref: values
- repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git
targetRevision: main
path: manifests/metrics
destination:
server: https://kubernetes.default.svc
namespace: metrics
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true

View File

@@ -0,0 +1,22 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: sillytavern
namespace: argocd
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
source:
repoURL: http://gitea.gitea.svc.cluster.local:3000/gitea/k3s-manifests.git
targetRevision: main
path: manifests/sillytavern
destination:
server: https://kubernetes.default.svc
namespace: sillytavern
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true

229
bootstrap.sh Executable file
View File

@@ -0,0 +1,229 @@
#!/bin/bash
set -e
# =============================================================================
# k3s GitOps Bootstrap Script
# =============================================================================
# This script sets up Gitea + ArgoCD on the k3s cluster and configures
# GitOps with the App-of-Apps pattern.
#
# Prerequisites:
# - kubectl + kubeconfig access to the cluster
# - helm installed
# - git installed
# - DNS for *.mrt0rtikize.ru pointing to cluster nodes
# =============================================================================
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_DIR="$(dirname "$SCRIPT_DIR")"
KUBECONFIG="${REPO_DIR}/config"
KCTL="kubectl --kubeconfig ${KUBECONFIG}"
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
echo -e "${GREEN}==============================================${NC}"
echo -e "${GREEN} k3s GitOps Bootstrap${NC}"
echo -e "${GREEN}==============================================${NC}"
echo ""
# -----------------------------------------------------------------------------
# Step 1: Deploy Gitea
# -----------------------------------------------------------------------------
echo -e "${YELLOW}[1/6] Deploying Gitea...${NC}"
${KCTL} apply -f "${REPO_DIR}/bootstrap/gitea/"
echo " Waiting for Gitea pod to be ready..."
${KCTL} wait --for=condition=ready pod -l app=gitea -n gitea --timeout=120s 2>/dev/null || {
echo -e "${RED} Gitea pod not ready after 120s. Checking status...${NC}"
${KCTL} get pod -n gitea
exit 1
}
echo -e "${GREEN} Gitea deployed!${NC}"
echo ""
# -----------------------------------------------------------------------------
# Step 2: Gitea initial setup (manual)
# -----------------------------------------------------------------------------
echo -e "${YELLOW}[2/6] Gitea setup${NC}"
echo ""
echo " Gitea is running. Please open the install page in your browser:"
echo ""
echo -e " ${GREEN}https://git.mrt0rtikize.ru/${NC}"
echo ""
echo " Complete the install wizard with these settings:"
echo " - Database: SQLite3"
echo " - Admin Username: gitea"
echo " - Admin Password: <choose a strong password>"
echo " - Confirm Password: <same>"
echo " - Admin Email: admin@mrt0rtikize.ru"
echo ""
echo " After install, create a repository named:"
echo ""
echo -e " ${GREEN}k3s-manifests${NC}"
echo ""
echo " Make it PUBLIC (so ArgoCD can read it without auth)."
echo ""
GITEA_PASSWORD=""
read -p " Gitea admin password (from install wizard): " GITEA_PASSWORD
if [ -z "$GITEA_PASSWORD" ]; then
echo -e "${RED} Password is required. Exiting.${NC}"
exit 1
fi
# Save password for later use
GITEA_EXTERNAL="https://git.mrt0rtikize.ru"
GITEA_INTERNAL="http://gitea.gitea.svc.cluster.local:3000"
GITEA_USER="gitea"
GITEA_REPO="k3s-manifests"
GITEA_REPO_URL="${GITEA_EXTERNAL}/${GITEA_USER}/${GITEA_REPO}.git"
GITEA_INTERNAL_REPO="${GITEA_INTERNAL}/${GITEA_USER}/${GITEA_REPO}.git"
echo ""
# -----------------------------------------------------------------------------
# Step 3: Initialize git repo and push manifests
# -----------------------------------------------------------------------------
echo -e "${YELLOW}[3/6] Initializing git repo...${NC}"
# Create .gitignore
cat > "${REPO_DIR}/.gitignore" << 'GITIGNORE'
# Sensitive files
config
GITIGNORE
cd "${REPO_DIR}"
if [ ! -d ".git" ]; then
git init
git checkout -b main
fi
git add .
git commit -m "Initial commit: k3s GitOps manifests" 2>/dev/null || {
echo " Nothing to commit (already up to date)"
}
echo " Pushing to Gitea..."
GIT_TERMINAL_PROMPT=0 git push -u "${GITEA_REPO_URL}" main 2>/dev/null || {
echo ""
echo -e " ${RED}Push failed.${NC} Did you create the '${GITEA_REPO}' repo in Gitea?"
echo " You can retry manually:"
echo " cd ${REPO_DIR}"
echo " git push -u ${GITEA_REPO_URL} main"
echo ""
read -p " Press Enter after pushing... " -r
}
echo -e "${GREEN} Manifests pushed to Gitea!${NC}"
echo ""
# -----------------------------------------------------------------------------
# Step 4: Install ArgoCD
# -----------------------------------------------------------------------------
echo -e "${YELLOW}[4/6] Installing ArgoCD...${NC}"
helm repo add argo https://argoproj.github.io/argo-helm 2>/dev/null || true
helm repo update
helm upgrade --install argocd argo/argo-cd \
--namespace argocd \
--create-namespace \
--set server.extraArgs[0]="--insecure" \
--set configs.params."server\.insecure"=true \
--set configs.cm.timeout.reconciliation=180s \
--wait \
--timeout 300s
ARGOCD_PASSWORD=$(${KCTL} -n argocd get secret argocd-initial-admin-secret -o jsonpath="{.data.password}" 2>/dev/null | base64 -d)
echo -e "${GREEN} ArgoCD installed!${NC}"
echo ""
echo " ArgoCD UI (port-forward):"
echo " kubectl --kubeconfig ${KUBECONFIG} port-forward svc/argocd-server -n argocd 8080:80"
echo " Username: admin"
echo ""
if [ -n "$ARGOCD_PASSWORD" ]; then
echo " Password: ${ARGOCD_PASSWORD}"
fi
echo ""
# -----------------------------------------------------------------------------
# Step 5: Configure ArgoCD → Gitea connection
# -----------------------------------------------------------------------------
echo -e "${YELLOW}[5/6] Configuring ArgoCD → Gitea connection...${NC}"
# Add Gitea as a repository in ArgoCD
# Using argocd CLI if available, otherwise using creds + secret
if command -v argocd &> /dev/null; then
echo " Using argocd CLI..."
ARGOCD_SERVER="localhost:8080"
echo " Please port-forward ArgoCD in another terminal:"
echo " kubectl --kubeconfig ${KUBECONFIG} port-forward svc/argocd-server -n argocd 8080:80"
echo ""
read -p " Press Enter when ready..." -r
argocd login "${ARGOCD_SERVER}" --username admin --password "${ARGOCD_PASSWORD}" --insecure
argocd repo add "${GITEA_INTERNAL_REPO}" --name gitea-k3s --type git
else
# Fallback: create repository secret manually
echo " Creating repository secret manually..."
${KCTL} -n argocd create secret generic gitea-k3s-repo \
--from-literal=url="${GITEA_INTERNAL_REPO}" \
--from-literal=type=git \
--from-literal=name=gitea-k3s \
--dry-run=client -o yaml | \
sed 's/name: gitea-k3s-repo/name: gitea-k3s-repo\n labels:\n argocd.argoproj.io\/secret-type: repository/' | \
${KCTL} apply -f - 2>/dev/null
# For a public repo, ArgoCD can access it without credentials
# If the repo is private, uncomment and configure:
# ${KCTL} -n argocd create secret generic gitea-k3s-repo \
# --from-literal=url="${GITEA_INTERNAL_REPO}" \
# --from-literal=type=git \
# --from-literal=name=gitea-k3s \
# --from-literal=username="${GITEA_USER}" \
# --from-literal=password="${GITEA_PASSWORD}" \
# --dry-run=client -o yaml | \
# sed 's/name: gitea-k3s-repo/name: gitea-k3s-repo\n labels:\n argocd.argoproj.io\/secret-type: repository/' | \
# ${KCTL} apply -f -
fi
echo -e "${GREEN} Repository configured!${NC}"
echo ""
# -----------------------------------------------------------------------------
# Step 6: Apply the root app
# -----------------------------------------------------------------------------
echo -e "${YELLOW}[6/6] Applying root App-of-Apps...${NC}"
${KCTL} apply -f "${REPO_DIR}/argocd/app-of-apps.yaml"
echo ""
echo -e "${GREEN}==============================================${NC}"
echo -e "${GREEN} Bootstrap Complete!${NC}"
echo -e "${GREEN}==============================================${NC}"
echo ""
echo " Root app created. ArgoCD will now sync all child apps:"
echo ""
echo " - cert-manager"
echo " - metallb"
echo " - longhorn"
echo " - metrics (prometheus + victoria-metrics)"
echo " - llama"
echo " - sillytavern"
echo ""
echo " Monitor progress:"
echo " kubectl --kubeconfig ${KUBECONFIG} port-forward svc/argocd-server -n argocd 8080:80"
echo " Open http://localhost:8080"
echo " Login: admin / ${ARGOCD_PASSWORD}"
echo ""
echo " Check sync status:"
echo " kubectl --kubeconfig ${KUBECONFIG} get applications -n argocd"
echo ""

36
bootstrap/argocd/install.sh Executable file
View File

@@ -0,0 +1,36 @@
#!/bin/bash
set -e
# Bootstrap ArgoCD on the k3s cluster
# This is a one-time manual step before GitOps takes over
KUBECONFIG="/home/mrt0rtikize/infra/k3s/config"
KCTL="kubectl --kubeconfig ${KUBECONFIG}"
echo "=== Installing ArgoCD ==="
# Add ArgoCD Helm repo
helm repo add argo https://argoproj.github.io/argo-helm 2>/dev/null || true
helm repo update
# Install ArgoCD
helm upgrade --install argocd argo/argo-cd \
--namespace argocd \
--create-namespace \
--set server.extraArgs[0]="--insecure" \
--set configs.params."server\.insecure"=true \
--set configs.cm.timeout.reconciliation=180s \
--wait \
--timeout 300s
echo ""
echo "=== ArgoCD installed ==="
echo ""
echo "To access ArgoCD UI:"
echo " kubectl --kubeconfig ${KUBECONFIG} port-forward svc/argocd-server -n argocd 8080:80"
echo ""
echo "Admin password:"
kubectl --kubeconfig ${KUBECONFIG} -n argocd get secret argocd-initial-admin-secret -o jsonpath="{.data.password}" | base64 -d
echo ""
echo ""
echo "Login with username: admin"

View File

@@ -0,0 +1,62 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: gitea
namespace: gitea
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: gitea
template:
metadata:
labels:
app: gitea
spec:
containers:
- name: gitea
image: gitea/gitea:1.24
ports:
- containerPort: 3000
name: http
- containerPort: 22
name: ssh
env:
- name: GITEA__database__DB_TYPE
value: sqlite3
- name: GITEA__server__DOMAIN
value: git.mrt0rtikize.ru
- name: GITEA__server__ROOT_URL
value: https://git.mrt0rtikize.ru
- name: GITEA__server__HTTP_PORT
value: "3000"
- name: GITEA__server__SSH_PORT
value: "22"
volumeMounts:
- name: data
mountPath: /data
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi
livenessProbe:
httpGet:
path: /
port: 3000
initialDelaySeconds: 10
periodSeconds: 10
readinessProbe:
httpGet:
path: /
port: 3000
initialDelaySeconds: 5
periodSeconds: 5
volumes:
- name: data
persistentVolumeClaim:
claimName: gitea-data

View File

@@ -0,0 +1,24 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: gitea
namespace: gitea
annotations:
cert-manager.io/cluster-issuer: letsencrypt-production
spec:
ingressClassName: traefik
tls:
- hosts:
- git.mrt0rtikize.ru
secretName: gitea-tls
rules:
- host: git.mrt0rtikize.ru
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: gitea
port:
number: 3000

View File

@@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: gitea

12
bootstrap/gitea/pvc.yaml Normal file
View File

@@ -0,0 +1,12 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: gitea-data
namespace: gitea
spec:
storageClassName: longhorn
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 20Gi

View File

@@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
name: gitea
namespace: gitea
spec:
selector:
app: gitea
ports:
- name: http
port: 3000
targetPort: 3000
- name: ssh
port: 22
targetPort: 22

0
infra/argocd/README.md Normal file
View File

0
infra/longhorn/README.md Normal file
View File

View File

@@ -0,0 +1,26 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: longhorn-ingress
namespace: longhorn-system
annotations:
cert-manager.io/cluster-issuer: letsencrypt-production
traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.tls: "true"
spec:
ingressClassName: traefik # We use Traefik as the ingress controller
tls:
- hosts:
- longhorn.mrt0rtikize.ru
secretName: longhorn-tls
rules:
- host: longhorn.mrt0rtikize.ru
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: longhorn-frontend # Service managing Longhorn dashboard
port:
number: 80 # Service port where Longhorn UI runs

View File

@@ -0,0 +1,23 @@
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: longhorn
provisioner: driver.longhorn.io
parameters:
numberOfReplicas: '2'
staleReplicaTimeout: '30'
allowVolumeExpansion: true
reclaimPolicy: Retain
volumeBindingMode: Immediate
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: longhorn-pvc
spec:
accessModes:
- ReadWriteOnce
storageClassName: longhorn
resources:
requests:
storage: 2Gi

0
infra/metallb/README.md Normal file
View File

View File

@@ -0,0 +1,10 @@
apiVersion: metallb.io/v1beta1
kind: IPAddressPool
metadata:
name: default-address-pool
namespace: metallb-system
spec:
addresses:
- 10.0.0.120-10.0.0.200
autoAssign: true
avoidBuggyIPs: true

View File

@@ -0,0 +1,8 @@
apiVersion: metallb.io/v1beta1
kind: L2Advertisement
metadata:
name: default-advertisement
namespace: metallb-system
spec:
ipAddressPools:
- default-address-pool

147
llama/cpu.yaml Normal file
View File

@@ -0,0 +1,147 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: llama-server-cpu
namespace: llama
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: llama-server-cpu
template:
metadata:
labels:
app: llama-server-cpu
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics"
spec:
nodeSelector:
gpu: amd
initContainers:
- name: download-model
image: python:3.11-slim
env:
- name: HF_HOME
value: /models/.hf
- name: MODEL_REPO
value: "byteshape/Qwen3-Coder-30B-A3B-Instruct-GGUF"
- name: MODEL_FILE
value: "Qwen3-Coder-30B-A3B-Instruct-IQ4_XS-4.20bpw.gguf"
command:
- /bin/sh
- -c
- |
set -eux
MODEL_PATH="/models/${MODEL_FILE}"
if [ -f "${MODEL_PATH}" ]; then
echo "Model already exists at ${MODEL_PATH}, skipping download"
exit 0
fi
echo "Installing Hugging Face Hub downloader"
pip install --no-cache-dir huggingface_hub
echo "Downloading ${MODEL_REPO}/${MODEL_FILE}"
python - <<'PY'
import os
from huggingface_hub import hf_hub_download
repo_id = os.environ["MODEL_REPO"]
filename = os.environ["MODEL_FILE"]
token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
path = hf_hub_download(
repo_id=repo_id,
filename=filename,
local_dir="/models",
local_dir_use_symlinks=False,
token=token,
)
print(f"Downloaded to: {path}")
PY
ls -lah /models
volumeMounts:
- name: models
mountPath: /models
containers:
- name: llama
image: ghcr.io/ggml-org/llama.cpp:server
args:
- "--model"
- "/models/Qwen3-Coder-30B-A3B-Instruct-IQ4_XS-4.20bpw.gguf"
- "--host"
- "0.0.0.0"
- "--port"
- "8080"
- "--metrics"
- "--ctx-size"
- "32768"
- "--parallel"
- "1"
- "--cache-type-k"
- "q8_0"
- "--cache-type-v"
- "q8_0"
ports:
- name: http
containerPort: 8080
volumeMounts:
- name: models
mountPath: /models
resources:
requests:
cpu: "8"
memory: "24Gi"
limits:
cpu: "12"
memory: "24Gi"
volumes:
- name: models
persistentVolumeClaim:
claimName: llama-cpu-models-pvc
---
apiVersion: v1
kind: Service
metadata:
name: llama-server-cpu
namespace: llama
spec:
selector:
app: llama-server-cpu
ports:
- name: http
port: 8080
targetPort: http
type: ClusterIP
---
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: llama-server-cpu
namespace: llama
labels:
app: llama-server-cpu
spec:
namespaceSelector:
matchNames:
- llama
selector:
matchLabels:
app: llama-server-cpu
podMetricsEndpoints:
- port: http
path: /metrics
interval: 15s

62
llama/gpu-exporter.yaml Normal file
View File

@@ -0,0 +1,62 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: radeon-exporter
namespace: llama
labels:
app: radeon-exporter
spec:
selector:
matchLabels:
app: radeon-exporter
template:
metadata:
labels:
app: radeon-exporter
spec:
nodeSelector:
gpu: amd
containers:
- name: radeon-exporter
image: kmulvey/radeon_exporter:latest
imagePullPolicy: IfNotPresent
ports:
- name: metrics
containerPort: 9200
securityContext:
privileged: true
volumeMounts:
- name: sys
mountPath: /sys
readOnly: true
- name: dri
mountPath: /dev/dri
readOnly: true
volumes:
- name: sys
hostPath:
path: /sys
type: Directory
- name: dri
hostPath:
path: /dev/dri
type: Directory
---
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: radeon-exporter
namespace: llama
labels:
monitoring: primary
spec:
namespaceSelector:
matchNames:
- llama
selector:
matchLabels:
app: radeon-exporter
podMetricsEndpoints:
- port: metrics
path: /metrics
interval: 15s

116
llama/litellm-db.yaml Normal file
View File

@@ -0,0 +1,116 @@
apiVersion: v1
kind: Secret
metadata:
name: litellm-postgres
namespace: llama
type: Opaque
stringData:
POSTGRES_DB: litellm
POSTGRES_USER: litellm
POSTGRES_PASSWORD: 7792e47efbc7348155f54a15ed34dc1d06716b2b1848711d0ee90e3461883c0d
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: litellm-postgres
namespace: llama
labels:
app.kubernetes.io/name: litellm-postgres
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: litellm-postgres
namespace: llama
labels:
app.kubernetes.io/name: litellm-postgres
app.kubernetes.io/component: database
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: litellm-postgres
app.kubernetes.io/component: database
template:
metadata:
labels:
app.kubernetes.io/name: litellm-postgres
app.kubernetes.io/component: database
spec:
containers:
- name: postgres
image: postgres:16
imagePullPolicy: IfNotPresent
ports:
- name: postgres
containerPort: 5432
env:
- name: POSTGRES_DB
valueFrom:
secretKeyRef:
name: litellm-postgres
key: POSTGRES_DB
- name: POSTGRES_USER
valueFrom:
secretKeyRef:
name: litellm-postgres
key: POSTGRES_USER
- name: POSTGRES_PASSWORD
valueFrom:
secretKeyRef:
name: litellm-postgres
key: POSTGRES_PASSWORD
volumeMounts:
- name: data
mountPath: /var/lib/postgresql
readinessProbe:
exec:
command:
- sh
- -c
- pg_isready -U "$POSTGRES_USER" -d "$POSTGRES_DB"
initialDelaySeconds: 5
periodSeconds: 10
livenessProbe:
exec:
command:
- sh
- -c
- pg_isready -U "$POSTGRES_USER" -d "$POSTGRES_DB"
initialDelaySeconds: 20
periodSeconds: 20
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 500m
memory: 256Mi
volumes:
- name: data
persistentVolumeClaim:
claimName: litellm-postgres
---
apiVersion: v1
kind: Service
metadata:
name: litellm-postgres
namespace: llama
labels:
app.kubernetes.io/name: litellm-postgres
app.kubernetes.io/component: database
spec:
selector:
app.kubernetes.io/name: litellm-postgres
app.kubernetes.io/component: database
ports:
- name: postgres
port: 5432
targetPort: postgres
type: ClusterIP

202
llama/litellm.yaml Normal file
View File

@@ -0,0 +1,202 @@
apiVersion: v1
kind: Secret
metadata:
name: litellm-secret
namespace: llama
labels:
app.kubernetes.io/name: litellm
app.kubernetes.io/component: gateway
type: Opaque
stringData:
LITELLM_MASTER_KEY: "6991c7c0f02b4bcf"
---
apiVersion: v1
kind: ConfigMap
metadata:
name: litellm-config
namespace: llama
labels:
app.kubernetes.io/name: litellm
app.kubernetes.io/component: gateway
data:
config.yaml: |
model_list:
- model_name: fast
litellm_params:
model: openai/fast
api_base: "http://llama-server-gpu.llama.svc.cluster.local:8080/v1"
api_key: none
- model_name: smart
litellm_params:
model: openai/smart
api_base: "http://llama-server-cpu.llama.svc.cluster.local:8080/v1"
api_key: none
- model_name: rp
litellm_params:
model: openai/rp
api_base: "http://llama-server-gpu-rp.llama.svc.cluster.local:8080/v1"
api_key: none
litellm_settings:
callbacks:
- prometheus
general_settings:
store_model_in_db: true
store_prompts_in_spend_logs: true
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: litellm
namespace: llama
labels:
app.kubernetes.io/name: litellm
app.kubernetes.io/component: gateway
app.kubernetes.io/part-of: llama-stack
monitoring: prometheus
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: litellm
app.kubernetes.io/component: gateway
template:
metadata:
labels:
app.kubernetes.io/name: litellm
app.kubernetes.io/component: gateway
app.kubernetes.io/part-of: llama-stack
monitoring: prometheus
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "4000"
prometheus.io/path: "/metrics"
spec:
containers:
- name: litellm
image: ghcr.io/berriai/litellm:v1.82.6.rc.3
imagePullPolicy: IfNotPresent
args:
- "--config"
- "/app/config.yaml"
env:
- name: LITELLM_MASTER_KEY
valueFrom:
secretKeyRef:
name: litellm-secret
key: LITELLM_MASTER_KEY
- name: POSTGRES_USER
valueFrom:
secretKeyRef:
name: litellm-postgres
key: POSTGRES_USER
- name: POSTGRES_PASSWORD
valueFrom:
secretKeyRef:
name: litellm-postgres
key: POSTGRES_PASSWORD
- name: POSTGRES_DB
valueFrom:
secretKeyRef:
name: litellm-postgres
key: POSTGRES_DB
- name: DATABASE_URL
value: "postgresql://$(POSTGRES_USER):$(POSTGRES_PASSWORD)@litellm-postgres.llama.svc.cluster.local:5432/$(POSTGRES_DB)"
ports:
- name: http
containerPort: 4000
protocol: TCP
volumeMounts:
- name: litellm-config
mountPath: /app/config.yaml
subPath: config.yaml
resources:
requests:
cpu: "500m"
memory: "1Gi"
limits:
cpu: "1000m"
memory: "2Gi"
volumes:
- name: litellm-config
configMap:
name: litellm-config
---
apiVersion: v1
kind: Service
metadata:
name: litellm
namespace: llama
labels:
app.kubernetes.io/name: litellm
app.kubernetes.io/component: gateway
app.kubernetes.io/part-of: llama-stack
monitoring: prometheus
spec:
selector:
app.kubernetes.io/name: litellm
app.kubernetes.io/component: gateway
ports:
- name: http
port: 4000
targetPort: http
protocol: TCP
type: ClusterIP
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: litellm
namespace: llama
labels:
app.kubernetes.io/name: litellm
app.kubernetes.io/component: gateway
app.kubernetes.io/part-of: llama-stack
annotations:
cert-manager.io/cluster-issuer: letsencrypt-production
traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.tls: "true"
spec:
ingressClassName: traefik
tls:
- hosts:
- litellm.mrt0rtikize.ru
secretName: web-echo-tls
rules:
- host: litellm.mrt0rtikize.ru
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: litellm
port:
number: 4000
---
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: litellm
namespace: llama
labels:
app.kubernetes.io/name: litellm
app.kubernetes.io/component: gateway
app.kubernetes.io/part-of: llama-stack
release: kube-prometheus-stack
spec:
namespaceSelector:
matchNames:
- llama
selector:
matchLabels:
app.kubernetes.io/name: litellm
app.kubernetes.io/component: gateway
podMetricsEndpoints:
- port: http
path: /metrics
interval: 30s

166
llama/main.yaml Normal file
View File

@@ -0,0 +1,166 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: llama-server-gpu
namespace: llama
spec:
replicas: 1
selector:
matchLabels:
app: llama-server-gpu
template:
metadata:
labels:
app: llama-server-gpu
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics"
spec:
nodeSelector:
gpu: amd
initContainers:
- name: download-model
image: python:3.11-slim
env:
- name: HF_HOME
value: /models/.hf
- name: MODEL_REPO
value: "byteshape/Devstral-Small-2-24B-Instruct-2512-GGUF"
- name: MODEL_FILE
value: "Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf"
# optional, only if you need gated/private models
# - name: HUGGING_FACE_HUB_TOKEN
# valueFrom:
# secretKeyRef:
# name: hf-token
# key: token
command:
- /bin/sh
- -c
- |
set -eux
MODEL_PATH="/models/${MODEL_FILE}"
if [ -f "${MODEL_PATH}" ]; then
echo "Model already exists at ${MODEL_PATH}, skipping download"
exit 0
fi
echo "Installing Hugging Face Hub downloader"
pip install --no-cache-dir huggingface_hub
echo "Downloading ${MODEL_REPO}/${MODEL_FILE}"
python - <<'PY'
import os
from huggingface_hub import hf_hub_download
repo_id = os.environ["MODEL_REPO"]
filename = os.environ["MODEL_FILE"]
token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
path = hf_hub_download(
repo_id=repo_id,
filename=filename,
local_dir="/models",
local_dir_use_symlinks=False,
token=token,
)
print(f"Downloaded to: {path}")
PY
ls -lah /models
volumeMounts:
- name: models
mountPath: /models
containers:
- name: llama
image: ghcr.io/ggml-org/llama.cpp:server-vulkan
args:
- "--model"
- "/models/Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf"
- "--host"
- "0.0.0.0"
- "--port"
- "8080"
- "--n-gpu-layers"
- "999"
- "--metrics"
# performance tuning
- "--ctx-size"
- "32768"
- "--parallel"
- "4"
# KV cache quantization
- "--cache-type-k"
- "q8_0"
- "--cache-type-v"
- "q8_0"
ports:
- name: http
containerPort: 8080
securityContext:
privileged: true
volumeMounts:
- name: models
mountPath: /models
- name: dri
mountPath: /dev/dri
resources:
requests:
cpu: "2"
memory: "4Gi"
limits:
cpu: "2"
memory: "4Gi"
volumes:
- name: models
persistentVolumeClaim:
claimName: llama-gpu-models-pvc
- name: dri
hostPath:
path: /dev/dri
type: Directory
---
apiVersion: v1
kind: Service
metadata:
name: llama-server-gpu
namespace: llama
spec:
selector:
app: llama-server-gpu
ports:
- name: http
port: 8080
targetPort: http
type: ClusterIP
---
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: llama-server-gpu
namespace: llama
labels:
app: llama-server-gpu
spec:
namespaceSelector:
matchNames:
- llama
selector:
matchLabels:
app: llama-server-gpu
podMetricsEndpoints:
- port: http
path: /metrics
interval: 15s

42
llama/namespace.yaml Normal file
View File

@@ -0,0 +1,42 @@
apiVersion: v1
kind: Namespace
metadata:
name: llama
---
# apiVersion: storage.k8s.io/v1
# kind: StorageClass
# metadata:
# name: longhorn-llama
# provisioner: driver.longhorn.io
# parameters:
# numberOfReplicas: "2"
# staleReplicaTimeout: "30"
# allowVolumeExpansion: true
# reclaimPolicy: Retain
# volumeBindingMode: Immediate
# ---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: llama-gpu-models-pvc
namespace: llama
spec:
accessModes:
- ReadWriteOnce
# storageClassName: longhorn-llama
resources:
requests:
storage: 50Gi
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: llama-cpu-models-pvc
namespace: llama
spec:
accessModes:
- ReadWriteOnce
# storageClassName: longhorn-llama
resources:
requests:
storage: 100Gi

166
llama/rp.yaml Normal file
View File

@@ -0,0 +1,166 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: llama-server-gpu-rp
namespace: llama
spec:
replicas: 1
selector:
matchLabels:
app: llama-server-gpu-rp
template:
metadata:
labels:
app: llama-server-gpu-rp
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics"
spec:
nodeSelector:
gpu: amd
initContainers:
- name: download-model
image: python:3.11-slim
env:
- name: HF_HOME
value: /models/.hf
- name: MODEL_REPO
value: "mradermacher/Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B-GGUF"
- name: MODEL_FILE
value: "Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B.Q4_K_S.gguf"
# optional, only if you need gated/private models
# - name: HUGGING_FACE_HUB_TOKEN
# valueFrom:
# secretKeyRef:
# name: hf-token
# key: token
command:
- /bin/sh
- -c
- |
set -eux
MODEL_PATH="/models/${MODEL_FILE}"
if [ -f "${MODEL_PATH}" ]; then
echo "Model already exists at ${MODEL_PATH}, skipping download"
exit 0
fi
echo "Installing Hugging Face Hub downloader"
pip install --no-cache-dir huggingface_hub
echo "Downloading ${MODEL_REPO}/${MODEL_FILE}"
python - <<'PY'
import os
from huggingface_hub import hf_hub_download
repo_id = os.environ["MODEL_REPO"]
filename = os.environ["MODEL_FILE"]
token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
path = hf_hub_download(
repo_id=repo_id,
filename=filename,
local_dir="/models",
local_dir_use_symlinks=False,
token=token,
)
print(f"Downloaded to: {path}")
PY
ls -lah /models
volumeMounts:
- name: models
mountPath: /models
containers:
- name: llama
image: ghcr.io/ggml-org/llama.cpp:server-vulkan
args:
- "--model"
- "/models/Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B.Q4_K_S.gguf"
- "--host"
- "0.0.0.0"
- "--port"
- "8080"
- "--n-gpu-layers"
- "999"
- "--metrics"
# performance tuning
- "--ctx-size"
- "32768"
- "--parallel"
- "1"
# KV cache quantization
- "--cache-type-k"
- "q8_0"
- "--cache-type-v"
- "q8_0"
ports:
- name: http
containerPort: 8080
securityContext:
privileged: true
volumeMounts:
- name: models
mountPath: /models
- name: dri
mountPath: /dev/dri
resources:
requests:
cpu: "2"
memory: "4Gi"
limits:
cpu: "2"
memory: "4Gi"
volumes:
- name: models
persistentVolumeClaim:
claimName: llama-gpu-models-pvc
- name: dri
hostPath:
path: /dev/dri
type: Directory
---
apiVersion: v1
kind: Service
metadata:
name: llama-server-gpu-rp
namespace: llama
spec:
selector:
app: llama-server-gpu-rp
ports:
- name: http
port: 8080
targetPort: http
type: ClusterIP
---
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: llama-server-gpu-rp
namespace: llama
labels:
app: llama-server-gpu-rp
spec:
namespaceSelector:
matchNames:
- llama
selector:
matchLabels:
app: llama-server-gpu-rp
podMetricsEndpoints:
- port: http
path: /metrics
interval: 15s

147
manifests/llama/cpu.yaml Normal file
View File

@@ -0,0 +1,147 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: llama-server-cpu
namespace: llama
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: llama-server-cpu
template:
metadata:
labels:
app: llama-server-cpu
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics"
spec:
nodeSelector:
gpu: amd
initContainers:
- name: download-model
image: python:3.11-slim
env:
- name: HF_HOME
value: /models/.hf
- name: MODEL_REPO
value: "byteshape/Qwen3-Coder-30B-A3B-Instruct-GGUF"
- name: MODEL_FILE
value: "Qwen3-Coder-30B-A3B-Instruct-IQ4_XS-4.20bpw.gguf"
command:
- /bin/sh
- -c
- |
set -eux
MODEL_PATH="/models/${MODEL_FILE}"
if [ -f "${MODEL_PATH}" ]; then
echo "Model already exists at ${MODEL_PATH}, skipping download"
exit 0
fi
echo "Installing Hugging Face Hub downloader"
pip install --no-cache-dir huggingface_hub
echo "Downloading ${MODEL_REPO}/${MODEL_FILE}"
python - <<'PY'
import os
from huggingface_hub import hf_hub_download
repo_id = os.environ["MODEL_REPO"]
filename = os.environ["MODEL_FILE"]
token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
path = hf_hub_download(
repo_id=repo_id,
filename=filename,
local_dir="/models",
local_dir_use_symlinks=False,
token=token,
)
print(f"Downloaded to: {path}")
PY
ls -lah /models
volumeMounts:
- name: models
mountPath: /models
containers:
- name: llama
image: ghcr.io/ggml-org/llama.cpp:server
args:
- "--model"
- "/models/Qwen3-Coder-30B-A3B-Instruct-IQ4_XS-4.20bpw.gguf"
- "--host"
- "0.0.0.0"
- "--port"
- "8080"
- "--metrics"
- "--ctx-size"
- "32768"
- "--parallel"
- "1"
- "--cache-type-k"
- "q8_0"
- "--cache-type-v"
- "q8_0"
ports:
- name: http
containerPort: 8080
volumeMounts:
- name: models
mountPath: /models
resources:
requests:
cpu: "8"
memory: "24Gi"
limits:
cpu: "12"
memory: "24Gi"
volumes:
- name: models
persistentVolumeClaim:
claimName: llama-cpu-models-pvc
---
apiVersion: v1
kind: Service
metadata:
name: llama-server-cpu
namespace: llama
spec:
selector:
app: llama-server-cpu
ports:
- name: http
port: 8080
targetPort: http
type: ClusterIP
---
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: llama-server-cpu
namespace: llama
labels:
app: llama-server-cpu
spec:
namespaceSelector:
matchNames:
- llama
selector:
matchLabels:
app: llama-server-cpu
podMetricsEndpoints:
- port: http
path: /metrics
interval: 15s

View File

@@ -0,0 +1,62 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: radeon-exporter
namespace: llama
labels:
app: radeon-exporter
spec:
selector:
matchLabels:
app: radeon-exporter
template:
metadata:
labels:
app: radeon-exporter
spec:
nodeSelector:
gpu: amd
containers:
- name: radeon-exporter
image: kmulvey/radeon_exporter:latest
imagePullPolicy: IfNotPresent
ports:
- name: metrics
containerPort: 9200
securityContext:
privileged: true
volumeMounts:
- name: sys
mountPath: /sys
readOnly: true
- name: dri
mountPath: /dev/dri
readOnly: true
volumes:
- name: sys
hostPath:
path: /sys
type: Directory
- name: dri
hostPath:
path: /dev/dri
type: Directory
---
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: radeon-exporter
namespace: llama
labels:
monitoring: primary
spec:
namespaceSelector:
matchNames:
- llama
selector:
matchLabels:
app: radeon-exporter
podMetricsEndpoints:
- port: metrics
path: /metrics
interval: 15s

View File

@@ -0,0 +1,116 @@
apiVersion: v1
kind: Secret
metadata:
name: litellm-postgres
namespace: llama
type: Opaque
stringData:
POSTGRES_DB: litellm
POSTGRES_USER: litellm
POSTGRES_PASSWORD: 7792e47efbc7348155f54a15ed34dc1d06716b2b1848711d0ee90e3461883c0d
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: litellm-postgres
namespace: llama
labels:
app.kubernetes.io/name: litellm-postgres
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: litellm-postgres
namespace: llama
labels:
app.kubernetes.io/name: litellm-postgres
app.kubernetes.io/component: database
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: litellm-postgres
app.kubernetes.io/component: database
template:
metadata:
labels:
app.kubernetes.io/name: litellm-postgres
app.kubernetes.io/component: database
spec:
containers:
- name: postgres
image: postgres:16
imagePullPolicy: IfNotPresent
ports:
- name: postgres
containerPort: 5432
env:
- name: POSTGRES_DB
valueFrom:
secretKeyRef:
name: litellm-postgres
key: POSTGRES_DB
- name: POSTGRES_USER
valueFrom:
secretKeyRef:
name: litellm-postgres
key: POSTGRES_USER
- name: POSTGRES_PASSWORD
valueFrom:
secretKeyRef:
name: litellm-postgres
key: POSTGRES_PASSWORD
volumeMounts:
- name: data
mountPath: /var/lib/postgresql
readinessProbe:
exec:
command:
- sh
- -c
- pg_isready -U "$POSTGRES_USER" -d "$POSTGRES_DB"
initialDelaySeconds: 5
periodSeconds: 10
livenessProbe:
exec:
command:
- sh
- -c
- pg_isready -U "$POSTGRES_USER" -d "$POSTGRES_DB"
initialDelaySeconds: 20
periodSeconds: 20
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 500m
memory: 256Mi
volumes:
- name: data
persistentVolumeClaim:
claimName: litellm-postgres
---
apiVersion: v1
kind: Service
metadata:
name: litellm-postgres
namespace: llama
labels:
app.kubernetes.io/name: litellm-postgres
app.kubernetes.io/component: database
spec:
selector:
app.kubernetes.io/name: litellm-postgres
app.kubernetes.io/component: database
ports:
- name: postgres
port: 5432
targetPort: postgres
type: ClusterIP

View File

@@ -0,0 +1,202 @@
apiVersion: v1
kind: Secret
metadata:
name: litellm-secret
namespace: llama
labels:
app.kubernetes.io/name: litellm
app.kubernetes.io/component: gateway
type: Opaque
stringData:
LITELLM_MASTER_KEY: "6991c7c0f02b4bcf"
---
apiVersion: v1
kind: ConfigMap
metadata:
name: litellm-config
namespace: llama
labels:
app.kubernetes.io/name: litellm
app.kubernetes.io/component: gateway
data:
config.yaml: |
model_list:
- model_name: fast
litellm_params:
model: openai/fast
api_base: "http://llama-server-gpu.llama.svc.cluster.local:8080/v1"
api_key: none
- model_name: smart
litellm_params:
model: openai/smart
api_base: "http://llama-server-cpu.llama.svc.cluster.local:8080/v1"
api_key: none
- model_name: rp
litellm_params:
model: openai/rp
api_base: "http://llama-server-gpu-rp.llama.svc.cluster.local:8080/v1"
api_key: none
litellm_settings:
callbacks:
- prometheus
general_settings:
store_model_in_db: true
store_prompts_in_spend_logs: true
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: litellm
namespace: llama
labels:
app.kubernetes.io/name: litellm
app.kubernetes.io/component: gateway
app.kubernetes.io/part-of: llama-stack
monitoring: prometheus
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: litellm
app.kubernetes.io/component: gateway
template:
metadata:
labels:
app.kubernetes.io/name: litellm
app.kubernetes.io/component: gateway
app.kubernetes.io/part-of: llama-stack
monitoring: prometheus
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "4000"
prometheus.io/path: "/metrics"
spec:
containers:
- name: litellm
image: ghcr.io/berriai/litellm:v1.82.6.rc.3
imagePullPolicy: IfNotPresent
args:
- "--config"
- "/app/config.yaml"
env:
- name: LITELLM_MASTER_KEY
valueFrom:
secretKeyRef:
name: litellm-secret
key: LITELLM_MASTER_KEY
- name: POSTGRES_USER
valueFrom:
secretKeyRef:
name: litellm-postgres
key: POSTGRES_USER
- name: POSTGRES_PASSWORD
valueFrom:
secretKeyRef:
name: litellm-postgres
key: POSTGRES_PASSWORD
- name: POSTGRES_DB
valueFrom:
secretKeyRef:
name: litellm-postgres
key: POSTGRES_DB
- name: DATABASE_URL
value: "postgresql://$(POSTGRES_USER):$(POSTGRES_PASSWORD)@litellm-postgres.llama.svc.cluster.local:5432/$(POSTGRES_DB)"
ports:
- name: http
containerPort: 4000
protocol: TCP
volumeMounts:
- name: litellm-config
mountPath: /app/config.yaml
subPath: config.yaml
resources:
requests:
cpu: "500m"
memory: "1Gi"
limits:
cpu: "1000m"
memory: "2Gi"
volumes:
- name: litellm-config
configMap:
name: litellm-config
---
apiVersion: v1
kind: Service
metadata:
name: litellm
namespace: llama
labels:
app.kubernetes.io/name: litellm
app.kubernetes.io/component: gateway
app.kubernetes.io/part-of: llama-stack
monitoring: prometheus
spec:
selector:
app.kubernetes.io/name: litellm
app.kubernetes.io/component: gateway
ports:
- name: http
port: 4000
targetPort: http
protocol: TCP
type: ClusterIP
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: litellm
namespace: llama
labels:
app.kubernetes.io/name: litellm
app.kubernetes.io/component: gateway
app.kubernetes.io/part-of: llama-stack
annotations:
cert-manager.io/cluster-issuer: letsencrypt-production
traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.tls: "true"
spec:
ingressClassName: traefik
tls:
- hosts:
- litellm.mrt0rtikize.ru
secretName: web-echo-tls
rules:
- host: litellm.mrt0rtikize.ru
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: litellm
port:
number: 4000
---
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: litellm
namespace: llama
labels:
app.kubernetes.io/name: litellm
app.kubernetes.io/component: gateway
app.kubernetes.io/part-of: llama-stack
release: kube-prometheus-stack
spec:
namespaceSelector:
matchNames:
- llama
selector:
matchLabels:
app.kubernetes.io/name: litellm
app.kubernetes.io/component: gateway
podMetricsEndpoints:
- port: http
path: /metrics
interval: 30s

166
manifests/llama/main.yaml Normal file
View File

@@ -0,0 +1,166 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: llama-server-gpu
namespace: llama
spec:
replicas: 1
selector:
matchLabels:
app: llama-server-gpu
template:
metadata:
labels:
app: llama-server-gpu
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics"
spec:
nodeSelector:
gpu: amd
initContainers:
- name: download-model
image: python:3.11-slim
env:
- name: HF_HOME
value: /models/.hf
- name: MODEL_REPO
value: "byteshape/Devstral-Small-2-24B-Instruct-2512-GGUF"
- name: MODEL_FILE
value: "Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf"
# optional, only if you need gated/private models
# - name: HUGGING_FACE_HUB_TOKEN
# valueFrom:
# secretKeyRef:
# name: hf-token
# key: token
command:
- /bin/sh
- -c
- |
set -eux
MODEL_PATH="/models/${MODEL_FILE}"
if [ -f "${MODEL_PATH}" ]; then
echo "Model already exists at ${MODEL_PATH}, skipping download"
exit 0
fi
echo "Installing Hugging Face Hub downloader"
pip install --no-cache-dir huggingface_hub
echo "Downloading ${MODEL_REPO}/${MODEL_FILE}"
python - <<'PY'
import os
from huggingface_hub import hf_hub_download
repo_id = os.environ["MODEL_REPO"]
filename = os.environ["MODEL_FILE"]
token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
path = hf_hub_download(
repo_id=repo_id,
filename=filename,
local_dir="/models",
local_dir_use_symlinks=False,
token=token,
)
print(f"Downloaded to: {path}")
PY
ls -lah /models
volumeMounts:
- name: models
mountPath: /models
containers:
- name: llama
image: ghcr.io/ggml-org/llama.cpp:server-vulkan
args:
- "--model"
- "/models/Devstral-Small-2-24B-Instruct-2512-IQ4_XS-4.04bpw.gguf"
- "--host"
- "0.0.0.0"
- "--port"
- "8080"
- "--n-gpu-layers"
- "999"
- "--metrics"
# performance tuning
- "--ctx-size"
- "32768"
- "--parallel"
- "4"
# KV cache quantization
- "--cache-type-k"
- "q8_0"
- "--cache-type-v"
- "q8_0"
ports:
- name: http
containerPort: 8080
securityContext:
privileged: true
volumeMounts:
- name: models
mountPath: /models
- name: dri
mountPath: /dev/dri
resources:
requests:
cpu: "2"
memory: "4Gi"
limits:
cpu: "2"
memory: "4Gi"
volumes:
- name: models
persistentVolumeClaim:
claimName: llama-gpu-models-pvc
- name: dri
hostPath:
path: /dev/dri
type: Directory
---
apiVersion: v1
kind: Service
metadata:
name: llama-server-gpu
namespace: llama
spec:
selector:
app: llama-server-gpu
ports:
- name: http
port: 8080
targetPort: http
type: ClusterIP
---
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: llama-server-gpu
namespace: llama
labels:
app: llama-server-gpu
spec:
namespaceSelector:
matchNames:
- llama
selector:
matchLabels:
app: llama-server-gpu
podMetricsEndpoints:
- port: http
path: /metrics
interval: 15s

View File

@@ -0,0 +1,42 @@
apiVersion: v1
kind: Namespace
metadata:
name: llama
---
# apiVersion: storage.k8s.io/v1
# kind: StorageClass
# metadata:
# name: longhorn-llama
# provisioner: driver.longhorn.io
# parameters:
# numberOfReplicas: "2"
# staleReplicaTimeout: "30"
# allowVolumeExpansion: true
# reclaimPolicy: Retain
# volumeBindingMode: Immediate
# ---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: llama-gpu-models-pvc
namespace: llama
spec:
accessModes:
- ReadWriteOnce
# storageClassName: longhorn-llama
resources:
requests:
storage: 50Gi
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: llama-cpu-models-pvc
namespace: llama
spec:
accessModes:
- ReadWriteOnce
# storageClassName: longhorn-llama
resources:
requests:
storage: 100Gi

166
manifests/llama/rp.yaml Normal file
View File

@@ -0,0 +1,166 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: llama-server-gpu-rp
namespace: llama
spec:
replicas: 1
selector:
matchLabels:
app: llama-server-gpu-rp
template:
metadata:
labels:
app: llama-server-gpu-rp
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics"
spec:
nodeSelector:
gpu: amd
initContainers:
- name: download-model
image: python:3.11-slim
env:
- name: HF_HOME
value: /models/.hf
- name: MODEL_REPO
value: "mradermacher/Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B-GGUF"
- name: MODEL_FILE
value: "Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B.Q4_K_S.gguf"
# optional, only if you need gated/private models
# - name: HUGGING_FACE_HUB_TOKEN
# valueFrom:
# secretKeyRef:
# name: hf-token
# key: token
command:
- /bin/sh
- -c
- |
set -eux
MODEL_PATH="/models/${MODEL_FILE}"
if [ -f "${MODEL_PATH}" ]; then
echo "Model already exists at ${MODEL_PATH}, skipping download"
exit 0
fi
echo "Installing Hugging Face Hub downloader"
pip install --no-cache-dir huggingface_hub
echo "Downloading ${MODEL_REPO}/${MODEL_FILE}"
python - <<'PY'
import os
from huggingface_hub import hf_hub_download
repo_id = os.environ["MODEL_REPO"]
filename = os.environ["MODEL_FILE"]
token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
path = hf_hub_download(
repo_id=repo_id,
filename=filename,
local_dir="/models",
local_dir_use_symlinks=False,
token=token,
)
print(f"Downloaded to: {path}")
PY
ls -lah /models
volumeMounts:
- name: models
mountPath: /models
containers:
- name: llama
image: ghcr.io/ggml-org/llama.cpp:server-vulkan
args:
- "--model"
- "/models/Omega-Darker-Gaslight_The-Final-Forgotten-Fever-Dream-24B.Q4_K_S.gguf"
- "--host"
- "0.0.0.0"
- "--port"
- "8080"
- "--n-gpu-layers"
- "999"
- "--metrics"
# performance tuning
- "--ctx-size"
- "32768"
- "--parallel"
- "1"
# KV cache quantization
- "--cache-type-k"
- "q8_0"
- "--cache-type-v"
- "q8_0"
ports:
- name: http
containerPort: 8080
securityContext:
privileged: true
volumeMounts:
- name: models
mountPath: /models
- name: dri
mountPath: /dev/dri
resources:
requests:
cpu: "2"
memory: "4Gi"
limits:
cpu: "2"
memory: "4Gi"
volumes:
- name: models
persistentVolumeClaim:
claimName: llama-gpu-models-pvc
- name: dri
hostPath:
path: /dev/dri
type: Directory
---
apiVersion: v1
kind: Service
metadata:
name: llama-server-gpu-rp
namespace: llama
spec:
selector:
app: llama-server-gpu-rp
ports:
- name: http
port: 8080
targetPort: http
type: ClusterIP
---
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: llama-server-gpu-rp
namespace: llama
labels:
app: llama-server-gpu-rp
spec:
namespaceSelector:
matchNames:
- llama
selector:
matchLabels:
app: llama-server-gpu-rp
podMetricsEndpoints:
- port: http
path: /metrics
interval: 15s

View File

@@ -0,0 +1,26 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: longhorn-ingress
namespace: longhorn-system
annotations:
cert-manager.io/cluster-issuer: letsencrypt-production
traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.tls: "true"
spec:
ingressClassName: traefik # We use Traefik as the ingress controller
tls:
- hosts:
- longhorn.mrt0rtikize.ru
secretName: longhorn-tls
rules:
- host: longhorn.mrt0rtikize.ru
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: longhorn-frontend # Service managing Longhorn dashboard
port:
number: 80 # Service port where Longhorn UI runs

View File

@@ -0,0 +1,23 @@
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: longhorn
provisioner: driver.longhorn.io
parameters:
numberOfReplicas: '2'
staleReplicaTimeout: '30'
allowVolumeExpansion: true
reclaimPolicy: Retain
volumeBindingMode: Immediate
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: longhorn-pvc
spec:
accessModes:
- ReadWriteOnce
storageClassName: longhorn
resources:
requests:
storage: 2Gi

View File

@@ -0,0 +1,10 @@
apiVersion: metallb.io/v1beta1
kind: IPAddressPool
metadata:
name: default-address-pool
namespace: metallb-system
spec:
addresses:
- 10.0.0.120-10.0.0.200
autoAssign: true
avoidBuggyIPs: true

View File

@@ -0,0 +1,8 @@
apiVersion: metallb.io/v1beta1
kind: L2Advertisement
metadata:
name: default-advertisement
namespace: metallb-system
spec:
ipAddressPools:
- default-address-pool

View File

@@ -0,0 +1,26 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: grafana
namespace: metrics
annotations:
cert-manager.io/cluster-issuer: letsencrypt-production
traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.tls: "true"
spec:
ingressClassName: traefik
tls:
- hosts:
- grafana.mrt0rtikize.ru
secretName: grafana-tls
rules:
- host: grafana.mrt0rtikize.ru
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: kube-prometheus-stack-grafana
port:
number: 80

View File

@@ -0,0 +1,90 @@
fullnameOverride: kube-prometheus
namespaceOverride: metrics
prometheusOperator:
namespace: metrics
admissionWebhooks:
failurePolicy: Ignore
alertmanager:
enabled: true
alertmanagerSpec:
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
cpu: 200m
memory: 512Mi
storage:
volumeClaimTemplate:
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi
prometheus:
enabled: true
prometheusSpec:
replicas: 1
retention: 15d
walCompression: true
serviceMonitorSelectorNilUsesHelmValues: false
podMonitorSelectorNilUsesHelmValues: false
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
cpu: 1000m
memory: 1Gi
storageSpec:
volumeClaimTemplate:
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 50Gi
remoteWrite:
- url: http://victoria-metrics.metrics.svc.cluster.local:8428/api/v1/write
queueConfig:
maxSamplesPerSend: 10000
capacity: 5000
maxShards: 30
kubeEtcd:
enabled: false
kubeControllerManager:
enabled: false
kubeScheduler:
enabled: false
kubeProxy:
enabled: false
grafana:
enabled: true
adminUser: admin
adminPassword: change-me
defaultDashboardsEnabled: true
resources:
requests:
cpu: 50m
memory: 256Mi
limits:
cpu: 200m
memory: 512Mi
persistence:
enabled: true
size: 10Gi
additionalDataSources:
- name: victoria-metrics
type: prometheus
access: proxy
url: http://victoria-metrics.metrics.svc.cluster.local:8428
isDefault: false

View File

@@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: metrics

View File

@@ -0,0 +1,19 @@
apiVersion: v1
kind: Service
metadata:
name: victoria-metrics
namespace: metrics
labels:
app.kubernetes.io/name: victoria-metrics-single
app.kubernetes.io/instance: victoria-metrics-single
app: server
spec:
selector:
app.kubernetes.io/name: victoria-metrics-single
app.kubernetes.io/instance: victoria-metrics-single
app: server
ports:
- name: http
port: 8428
targetPort: 8428
type: ClusterIP

View File

@@ -0,0 +1,23 @@
fullnameOverride: victoria-metrics-single
namespaceOverride: metrics
server:
retentionPeriod: 30d
scrapeInterval: 30s
replicaCount: 1
persistentVolume:
enabled: true
size: 200Gi
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 500m
memory: 1Gi
service:
type: ClusterIP
port: 8428
serviceAccount:
create: true

View File

@@ -0,0 +1,122 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: sillytavern-config
namespace: sillytavern
data:
config.yaml: |
dataRoot: ./data
listen: true
listenAddress:
ipv4: 0.0.0.0
ipv6: '[::]'
protocol:
ipv4: true
ipv6: false
dnsPreferIPv6: false
browserLaunch:
enabled: false
browser: 'default'
hostname: 'auto'
port: -1
avoidLocalhost: false
port: 8000
ssl:
enabled: false
certPath: "./certs/cert.pem"
keyPath: "./certs/privkey.pem"
keyPassphrase: ""
whitelistMode: false
enableForwardedWhitelist: false
whitelist:
- ::1
- 127.0.0.1
whitelistDockerHosts: false
basicAuthMode: false
basicAuthUser:
username: "user"
password: "password"
enableCorsProxy: false
requestProxy:
enabled: false
url: "socks5://username:password@example.com:1080"
bypass:
- localhost
- 127.0.0.1
enableUserAccounts: false
enableDiscreetLogin: false
perUserBasicAuth: false
sso:
autheliaAuth: false
authentikAuth: false
hostWhitelist:
enabled: false
scan: true
hosts: []
sessionTimeout: -1
disableCsrfProtection: false
securityOverride: false
logging:
enableAccessLog: true
minLogLevel: 0
rateLimiting:
preferRealIpHeader: false
backups:
common:
numberOfBackups: 50
chat:
enabled: true
checkIntegrity: true
maxTotalBackups: -1
throttleInterval: 10000
thumbnails:
enabled: true
format: "jpg"
quality: 95
dimensions: { 'bg': [160, 90], 'avatar': [96, 144], 'persona': [96, 144] }
performance:
lazyLoadCharacters: false
memoryCacheCapacity: '100mb'
useDiskCache: true
cacheBuster:
enabled: false
userAgentPattern: ''
allowKeysExposure: false
skipContentCheck: false
whitelistImportDomains:
- localhost
- cdn.discordapp.com
- files.catbox.moe
- raw.githubusercontent.com
- char-archive.evulid.cc
requestOverrides: []
extensions:
enabled: true
autoUpdate: true
models:
autoDownload: true
classification: Cohee/distilbert-base-uncased-go-emotions-onnx
captioning: Xenova/vit-gpt2-image-captioning
embedding: Cohee/jina-embeddings-v2-base-en
speechToText: Xenova/whisper-small
textToSpeech: Xenova/speecht5_tts
enableDownloadableTokenizers: true
promptPlaceholder: "[Start a new chat]"
openai:
randomizeUserId: false
captionSystemPrompt: ""
deepl:
formality: default
mistral:
enablePrefix: false
ollama:
keepAlive: -1
batchSize: -1
claude:
enableSystemPromptCache: false
cachingAtDepth: -1
extendedTTL: false
gemini:
apiVersion: 'v1beta'
enableServerPlugins: false
enableServerPluginsAutoUpdate: true

View File

@@ -0,0 +1,61 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: sillytavern
namespace: sillytavern
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: sillytavern
template:
metadata:
labels:
app: sillytavern
spec:
containers:
- name: sillytavern
image: ghcr.io/sillytavern/sillytavern:latest
ports:
- containerPort: 8000
protocol: TCP
env:
- name: NODE_ENV
value: production
- name: FORCE_COLOR
value: "1"
envFrom:
- secretRef:
name: sillytavern-auth
volumeMounts:
- name: config
mountPath: /home/node/app/config/config.yaml
subPath: config.yaml
- name: data
mountPath: /home/node/app/data
- name: plugins
mountPath: /home/node/app/plugins
- name: extensions
mountPath: /home/node/app/public/scripts/extensions/third-party
resources:
requests:
cpu: "1"
memory: 1Gi
limits:
cpu: "4"
memory: 4Gi
volumes:
- name: config
configMap:
name: sillytavern-config
- name: data
persistentVolumeClaim:
claimName: sillytavern-data
- name: plugins
persistentVolumeClaim:
claimName: sillytavern-plugins
- name: extensions
persistentVolumeClaim:
claimName: sillytavern-extensions

View File

@@ -0,0 +1,30 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: sillytavern
namespace: sillytavern
labels:
app.kubernetes.io/name: sillytavern
app.kubernetes.io/component: frontend
app.kubernetes.io/part-of: sillytavern
annotations:
cert-manager.io/cluster-issuer: letsencrypt-production
traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.tls: "true"
spec:
ingressClassName: traefik
tls:
- hosts:
- sillytavern.mrt0rtikize.ru
secretName: sillytavern-tls
rules:
- host: sillytavern.mrt0rtikize.ru
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: sillytavern
port:
number: 8000

View File

@@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: sillytavern

View File

@@ -0,0 +1,35 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: sillytavern-data
namespace: sillytavern
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 5Gi
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: sillytavern-plugins
namespace: sillytavern
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: sillytavern-extensions
namespace: sillytavern
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi

View File

@@ -0,0 +1,10 @@
apiVersion: v1
kind: Secret
metadata:
name: sillytavern-auth
namespace: sillytavern
type: Opaque
stringData:
SILLYTAVERN_BASICAUTHMODE: "true"
SILLYTAVERN_BASICAUTHUSER_USERNAME: admin
SILLYTAVERN_BASICAUTHUSER_PASSWORD: 0cdaa30c396dae77

View File

@@ -0,0 +1,13 @@
apiVersion: v1
kind: Service
metadata:
name: sillytavern
namespace: sillytavern
spec:
selector:
app: sillytavern
ports:
- port: 8000
targetPort: 8000
protocol: TCP
type: ClusterIP

62
metrics/README.md Normal file
View File

@@ -0,0 +1,62 @@
# metrics stack
Opinionated manifests for deploying kube-prometheus-stack (Prometheus Operator + Grafana) together with a VictoriaMetrics single-node database in the `metrics` namespace.
## Install / upgrade
```sh
kubectl apply -f metrics/namespace.yaml
# kube-prometheus-stack
target=sc prometheus-community
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo update
helm upgrade --install kube-prometheus-stack prometheus-community/kube-prometheus-stack \
--namespace metrics \
--values metrics/kube-prometheus-stack-values.yaml
kubectl --namespace metrics get secret kube-prometheus-stack-grafana \
-o jsonpath="{.data.admin-password}" | base64 -d
echo
# expose grafana via Traefik
kubectl apply -f metrics/grafana-ingress.yaml
kubectl -n metrics get ingress grafana
# victoria metrics for long-term storage
helm repo add victoria-metrics https://victoriametrics.github.io/helm-charts
helm upgrade --install victoria-metrics-single victoria-metrics/victoria-metrics-single \
--namespace metrics \
--values metrics/victoria-metrics-single-values.yaml
# expose victoria metrics via ClusterIP for Prometheus/Grafana
kubectl apply -f metrics/victoria-metrics-service.yaml
```
The manifests default to the Yandex Managed Kubernetes dynamic storage class `yc-network-hdd`; tweak the `storageClassName`/`storageClass` fields and capacities if you prefer something else.
Before applying `metrics/grafana-ingress.yaml`, update the host (`grafana.playground.t01tt.tech`) and, if needed, change the `cert-manager.io/cluster-issuer` annotation to match your staging/production workflow. The ingress uses the `traefik` ingress class.
## Components
- **Prometheus Operator** provisions Prometheus, Alertmanager and related CRDs. Remote write targets VictoriaMetrics for durable retention.
- **Grafana** is pre-provisioned with persistence enabled and a secondary data source pointing at VictoriaMetrics.
- **VictoriaMetrics** stores metrics for long-term retention while also serving query traffic for Grafana. A dedicated ClusterIP service (`metrics/victoria-metrics-service.yaml`) exposes port 8428 for Prometheus remote write and Grafana queries.
## Database choices
Prometheus ships with an embedded TSDB. For longer retention, clustering or multi-tenant needs you can offload data to:
- **VictoriaMetrics** (single, clustered, or managed) cost-efficient, Prometheus-compatible, supports multi-year retention.
- **Thanos / Cortex / Grafana Mimir** horizontally scalable object-storage backed TSDBs with multi-cluster federation.
- **ClickHouse / TimescaleDB / PostgreSQL** SQL stores for advanced analytics (requires Promscale or similar adapter).
- **Graphite / InfluxDB** legacy or streaming-friendly stores; integrate via remote write adapters.
Pick the backend that matches your retention and query latency requirements. Remote write configuration lives under `prometheus.prometheusSpec.remoteWrite` in `kube-prometheus-stack-values.yaml`.
## Post-install checks
```sh
kubectl -n metrics get pods
kubectl -n metrics get svc
kubectl -n metrics get prometheus,prometheusrules,servicemonitors -A
```

View File

@@ -0,0 +1,26 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: grafana
namespace: metrics
annotations:
cert-manager.io/cluster-issuer: letsencrypt-production
traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.tls: "true"
spec:
ingressClassName: traefik
tls:
- hosts:
- grafana.mrt0rtikize.ru
secretName: grafana-tls
rules:
- host: grafana.mrt0rtikize.ru
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: kube-prometheus-stack-grafana
port:
number: 80

View File

@@ -0,0 +1,90 @@
fullnameOverride: kube-prometheus
namespaceOverride: metrics
prometheusOperator:
namespace: metrics
admissionWebhooks:
failurePolicy: Ignore
alertmanager:
enabled: true
alertmanagerSpec:
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
cpu: 200m
memory: 512Mi
storage:
volumeClaimTemplate:
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi
prometheus:
enabled: true
prometheusSpec:
replicas: 1
retention: 15d
walCompression: true
serviceMonitorSelectorNilUsesHelmValues: false
podMonitorSelectorNilUsesHelmValues: false
resources:
requests:
cpu: 100m
memory: 512Mi
limits:
cpu: 1000m
memory: 1Gi
storageSpec:
volumeClaimTemplate:
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 50Gi
remoteWrite:
- url: http://victoria-metrics.metrics.svc.cluster.local:8428/api/v1/write
queueConfig:
maxSamplesPerSend: 10000
capacity: 5000
maxShards: 30
kubeEtcd:
enabled: false
kubeControllerManager:
enabled: false
kubeScheduler:
enabled: false
kubeProxy:
enabled: false
grafana:
enabled: true
adminUser: admin
adminPassword: change-me
defaultDashboardsEnabled: true
resources:
requests:
cpu: 50m
memory: 256Mi
limits:
cpu: 200m
memory: 512Mi
persistence:
enabled: true
size: 10Gi
additionalDataSources:
- name: victoria-metrics
type: prometheus
access: proxy
url: http://victoria-metrics.metrics.svc.cluster.local:8428
isDefault: false

4
metrics/namespace.yaml Normal file
View File

@@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: metrics

View File

@@ -0,0 +1,19 @@
apiVersion: v1
kind: Service
metadata:
name: victoria-metrics
namespace: metrics
labels:
app.kubernetes.io/name: victoria-metrics-single
app.kubernetes.io/instance: victoria-metrics-single
app: server
spec:
selector:
app.kubernetes.io/name: victoria-metrics-single
app.kubernetes.io/instance: victoria-metrics-single
app: server
ports:
- name: http
port: 8428
targetPort: 8428
type: ClusterIP

View File

@@ -0,0 +1,23 @@
fullnameOverride: victoria-metrics-single
namespaceOverride: metrics
server:
retentionPeriod: 30d
scrapeInterval: 30s
replicaCount: 1
persistentVolume:
enabled: true
size: 200Gi
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 500m
memory: 1Gi
service:
type: ClusterIP
port: 8428
serviceAccount:
create: true

122
sillytavern/configmap.yaml Normal file
View File

@@ -0,0 +1,122 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: sillytavern-config
namespace: sillytavern
data:
config.yaml: |
dataRoot: ./data
listen: true
listenAddress:
ipv4: 0.0.0.0
ipv6: '[::]'
protocol:
ipv4: true
ipv6: false
dnsPreferIPv6: false
browserLaunch:
enabled: false
browser: 'default'
hostname: 'auto'
port: -1
avoidLocalhost: false
port: 8000
ssl:
enabled: false
certPath: "./certs/cert.pem"
keyPath: "./certs/privkey.pem"
keyPassphrase: ""
whitelistMode: false
enableForwardedWhitelist: false
whitelist:
- ::1
- 127.0.0.1
whitelistDockerHosts: false
basicAuthMode: false
basicAuthUser:
username: "user"
password: "password"
enableCorsProxy: false
requestProxy:
enabled: false
url: "socks5://username:password@example.com:1080"
bypass:
- localhost
- 127.0.0.1
enableUserAccounts: false
enableDiscreetLogin: false
perUserBasicAuth: false
sso:
autheliaAuth: false
authentikAuth: false
hostWhitelist:
enabled: false
scan: true
hosts: []
sessionTimeout: -1
disableCsrfProtection: false
securityOverride: false
logging:
enableAccessLog: true
minLogLevel: 0
rateLimiting:
preferRealIpHeader: false
backups:
common:
numberOfBackups: 50
chat:
enabled: true
checkIntegrity: true
maxTotalBackups: -1
throttleInterval: 10000
thumbnails:
enabled: true
format: "jpg"
quality: 95
dimensions: { 'bg': [160, 90], 'avatar': [96, 144], 'persona': [96, 144] }
performance:
lazyLoadCharacters: false
memoryCacheCapacity: '100mb'
useDiskCache: true
cacheBuster:
enabled: false
userAgentPattern: ''
allowKeysExposure: false
skipContentCheck: false
whitelistImportDomains:
- localhost
- cdn.discordapp.com
- files.catbox.moe
- raw.githubusercontent.com
- char-archive.evulid.cc
requestOverrides: []
extensions:
enabled: true
autoUpdate: true
models:
autoDownload: true
classification: Cohee/distilbert-base-uncased-go-emotions-onnx
captioning: Xenova/vit-gpt2-image-captioning
embedding: Cohee/jina-embeddings-v2-base-en
speechToText: Xenova/whisper-small
textToSpeech: Xenova/speecht5_tts
enableDownloadableTokenizers: true
promptPlaceholder: "[Start a new chat]"
openai:
randomizeUserId: false
captionSystemPrompt: ""
deepl:
formality: default
mistral:
enablePrefix: false
ollama:
keepAlive: -1
batchSize: -1
claude:
enableSystemPromptCache: false
cachingAtDepth: -1
extendedTTL: false
gemini:
apiVersion: 'v1beta'
enableServerPlugins: false
enableServerPluginsAutoUpdate: true

View File

@@ -0,0 +1,61 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: sillytavern
namespace: sillytavern
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: sillytavern
template:
metadata:
labels:
app: sillytavern
spec:
containers:
- name: sillytavern
image: ghcr.io/sillytavern/sillytavern:latest
ports:
- containerPort: 8000
protocol: TCP
env:
- name: NODE_ENV
value: production
- name: FORCE_COLOR
value: "1"
envFrom:
- secretRef:
name: sillytavern-auth
volumeMounts:
- name: config
mountPath: /home/node/app/config/config.yaml
subPath: config.yaml
- name: data
mountPath: /home/node/app/data
- name: plugins
mountPath: /home/node/app/plugins
- name: extensions
mountPath: /home/node/app/public/scripts/extensions/third-party
resources:
requests:
cpu: "1"
memory: 1Gi
limits:
cpu: "4"
memory: 4Gi
volumes:
- name: config
configMap:
name: sillytavern-config
- name: data
persistentVolumeClaim:
claimName: sillytavern-data
- name: plugins
persistentVolumeClaim:
claimName: sillytavern-plugins
- name: extensions
persistentVolumeClaim:
claimName: sillytavern-extensions

30
sillytavern/ingress.yaml Normal file
View File

@@ -0,0 +1,30 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: sillytavern
namespace: sillytavern
labels:
app.kubernetes.io/name: sillytavern
app.kubernetes.io/component: frontend
app.kubernetes.io/part-of: sillytavern
annotations:
cert-manager.io/cluster-issuer: letsencrypt-production
traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.tls: "true"
spec:
ingressClassName: traefik
tls:
- hosts:
- sillytavern.mrt0rtikize.ru
secretName: sillytavern-tls
rules:
- host: sillytavern.mrt0rtikize.ru
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: sillytavern
port:
number: 8000

View File

@@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: sillytavern

35
sillytavern/pvc.yaml Normal file
View File

@@ -0,0 +1,35 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: sillytavern-data
namespace: sillytavern
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 5Gi
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: sillytavern-plugins
namespace: sillytavern
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: sillytavern-extensions
namespace: sillytavern
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi

10
sillytavern/secret.yaml Normal file
View File

@@ -0,0 +1,10 @@
apiVersion: v1
kind: Secret
metadata:
name: sillytavern-auth
namespace: sillytavern
type: Opaque
stringData:
SILLYTAVERN_BASICAUTHMODE: "true"
SILLYTAVERN_BASICAUTHUSER_USERNAME: admin
SILLYTAVERN_BASICAUTHUSER_PASSWORD: 0cdaa30c396dae77

13
sillytavern/service.yaml Normal file
View File

@@ -0,0 +1,13 @@
apiVersion: v1
kind: Service
metadata:
name: sillytavern
namespace: sillytavern
spec:
selector:
app: sillytavern
ports:
- port: 8000
targetPort: 8000
protocol: TCP
type: ClusterIP