203 lines
5.1 KiB
YAML
203 lines
5.1 KiB
YAML
apiVersion: v1
|
|
kind: Secret
|
|
metadata:
|
|
name: litellm-secret
|
|
namespace: llama
|
|
labels:
|
|
app.kubernetes.io/name: litellm
|
|
app.kubernetes.io/component: gateway
|
|
type: Opaque
|
|
stringData:
|
|
LITELLM_MASTER_KEY: "6991c7c0f02b4bcf"
|
|
---
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: litellm-config
|
|
namespace: llama
|
|
labels:
|
|
app.kubernetes.io/name: litellm
|
|
app.kubernetes.io/component: gateway
|
|
data:
|
|
config.yaml: |
|
|
model_list:
|
|
- model_name: fast
|
|
litellm_params:
|
|
model: openai/fast
|
|
api_base: "http://llama-server-gpu.llama.svc.cluster.local:8080/v1"
|
|
api_key: none
|
|
|
|
- model_name: smart
|
|
litellm_params:
|
|
model: openai/smart
|
|
api_base: "http://llama-server-cpu.llama.svc.cluster.local:8080/v1"
|
|
api_key: none
|
|
|
|
- model_name: rp
|
|
litellm_params:
|
|
model: openai/rp
|
|
api_base: "http://llama-server-gpu-rp.llama.svc.cluster.local:8080/v1"
|
|
api_key: none
|
|
litellm_settings:
|
|
callbacks:
|
|
- prometheus
|
|
general_settings:
|
|
store_model_in_db: true
|
|
store_prompts_in_spend_logs: true
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: litellm
|
|
namespace: llama
|
|
labels:
|
|
app.kubernetes.io/name: litellm
|
|
app.kubernetes.io/component: gateway
|
|
app.kubernetes.io/part-of: llama-stack
|
|
monitoring: prometheus
|
|
spec:
|
|
replicas: 1
|
|
selector:
|
|
matchLabels:
|
|
app.kubernetes.io/name: litellm
|
|
app.kubernetes.io/component: gateway
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app.kubernetes.io/name: litellm
|
|
app.kubernetes.io/component: gateway
|
|
app.kubernetes.io/part-of: llama-stack
|
|
monitoring: prometheus
|
|
annotations:
|
|
prometheus.io/scrape: "true"
|
|
prometheus.io/port: "4000"
|
|
prometheus.io/path: "/metrics"
|
|
spec:
|
|
containers:
|
|
- name: litellm
|
|
image: ghcr.io/berriai/litellm:v1.82.6.rc.3
|
|
imagePullPolicy: IfNotPresent
|
|
args:
|
|
- "--config"
|
|
- "/app/config.yaml"
|
|
env:
|
|
- name: LITELLM_MASTER_KEY
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: litellm-secret
|
|
key: LITELLM_MASTER_KEY
|
|
- name: POSTGRES_USER
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: litellm-postgres
|
|
key: POSTGRES_USER
|
|
|
|
- name: POSTGRES_PASSWORD
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: litellm-postgres
|
|
key: POSTGRES_PASSWORD
|
|
|
|
- name: POSTGRES_DB
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: litellm-postgres
|
|
key: POSTGRES_DB
|
|
|
|
- name: DATABASE_URL
|
|
value: "postgresql://$(POSTGRES_USER):$(POSTGRES_PASSWORD)@litellm-postgres.llama.svc.cluster.local:5432/$(POSTGRES_DB)"
|
|
ports:
|
|
- name: http
|
|
containerPort: 4000
|
|
protocol: TCP
|
|
volumeMounts:
|
|
- name: litellm-config
|
|
mountPath: /app/config.yaml
|
|
subPath: config.yaml
|
|
resources:
|
|
requests:
|
|
cpu: "500m"
|
|
memory: "1Gi"
|
|
limits:
|
|
cpu: "1000m"
|
|
memory: "2Gi"
|
|
volumes:
|
|
- name: litellm-config
|
|
configMap:
|
|
name: litellm-config
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: litellm
|
|
namespace: llama
|
|
labels:
|
|
app.kubernetes.io/name: litellm
|
|
app.kubernetes.io/component: gateway
|
|
app.kubernetes.io/part-of: llama-stack
|
|
monitoring: prometheus
|
|
spec:
|
|
selector:
|
|
app.kubernetes.io/name: litellm
|
|
app.kubernetes.io/component: gateway
|
|
ports:
|
|
- name: http
|
|
port: 4000
|
|
targetPort: http
|
|
protocol: TCP
|
|
type: ClusterIP
|
|
---
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: litellm
|
|
namespace: llama
|
|
labels:
|
|
app.kubernetes.io/name: litellm
|
|
app.kubernetes.io/component: gateway
|
|
app.kubernetes.io/part-of: llama-stack
|
|
annotations:
|
|
cert-manager.io/cluster-issuer: letsencrypt-production
|
|
traefik.ingress.kubernetes.io/router.entrypoints: websecure
|
|
traefik.ingress.kubernetes.io/router.tls: "true"
|
|
spec:
|
|
ingressClassName: traefik
|
|
tls:
|
|
- hosts:
|
|
- litellm.mrt0rtikize.ru
|
|
secretName: web-echo-tls
|
|
rules:
|
|
- host: litellm.mrt0rtikize.ru
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend:
|
|
service:
|
|
name: litellm
|
|
port:
|
|
number: 4000
|
|
---
|
|
apiVersion: monitoring.coreos.com/v1
|
|
kind: PodMonitor
|
|
metadata:
|
|
name: litellm
|
|
namespace: llama
|
|
labels:
|
|
app.kubernetes.io/name: litellm
|
|
app.kubernetes.io/component: gateway
|
|
app.kubernetes.io/part-of: llama-stack
|
|
release: kube-prometheus-stack
|
|
spec:
|
|
namespaceSelector:
|
|
matchNames:
|
|
- llama
|
|
selector:
|
|
matchLabels:
|
|
app.kubernetes.io/name: litellm
|
|
app.kubernetes.io/component: gateway
|
|
podMetricsEndpoints:
|
|
- port: http
|
|
path: /metrics
|
|
interval: 30s
|