apiVersion: v1 kind: Secret metadata: name: litellm-secret namespace: llama labels: app.kubernetes.io/name: litellm app.kubernetes.io/component: gateway type: Opaque stringData: LITELLM_MASTER_KEY: "6991c7c0f02b4bcf" --- apiVersion: v1 kind: ConfigMap metadata: name: litellm-config namespace: llama labels: app.kubernetes.io/name: litellm app.kubernetes.io/component: gateway data: config.yaml: | model_list: - model_name: fast litellm_params: model: openai/fast api_base: "http://llama-server-gpu.llama.svc.cluster.local:8080/v1" api_key: none - model_name: smart litellm_params: model: openai/smart api_base: "http://llama-server-cpu.llama.svc.cluster.local:8080/v1" api_key: none - model_name: rp litellm_params: model: openai/rp api_base: "http://llama-server-gpu-rp.llama.svc.cluster.local:8080/v1" api_key: none litellm_settings: callbacks: - prometheus general_settings: store_model_in_db: true store_prompts_in_spend_logs: true --- apiVersion: apps/v1 kind: Deployment metadata: name: litellm namespace: llama labels: app.kubernetes.io/name: litellm app.kubernetes.io/component: gateway app.kubernetes.io/part-of: llama-stack monitoring: prometheus spec: replicas: 1 selector: matchLabels: app.kubernetes.io/name: litellm app.kubernetes.io/component: gateway template: metadata: labels: app.kubernetes.io/name: litellm app.kubernetes.io/component: gateway app.kubernetes.io/part-of: llama-stack monitoring: prometheus annotations: prometheus.io/scrape: "true" prometheus.io/port: "4000" prometheus.io/path: "/metrics" spec: containers: - name: litellm image: ghcr.io/berriai/litellm:v1.82.6.rc.3 imagePullPolicy: IfNotPresent args: - "--config" - "/app/config.yaml" env: - name: LITELLM_MASTER_KEY valueFrom: secretKeyRef: name: litellm-secret key: LITELLM_MASTER_KEY - name: POSTGRES_USER valueFrom: secretKeyRef: name: litellm-postgres key: POSTGRES_USER - name: POSTGRES_PASSWORD valueFrom: secretKeyRef: name: litellm-postgres key: POSTGRES_PASSWORD - name: POSTGRES_DB valueFrom: secretKeyRef: name: litellm-postgres key: POSTGRES_DB - name: DATABASE_URL value: "postgresql://$(POSTGRES_USER):$(POSTGRES_PASSWORD)@litellm-postgres.llama.svc.cluster.local:5432/$(POSTGRES_DB)" ports: - name: http containerPort: 4000 protocol: TCP volumeMounts: - name: litellm-config mountPath: /app/config.yaml subPath: config.yaml resources: requests: cpu: "500m" memory: "1Gi" limits: cpu: "1000m" memory: "2Gi" volumes: - name: litellm-config configMap: name: litellm-config --- apiVersion: v1 kind: Service metadata: name: litellm namespace: llama labels: app.kubernetes.io/name: litellm app.kubernetes.io/component: gateway app.kubernetes.io/part-of: llama-stack monitoring: prometheus spec: selector: app.kubernetes.io/name: litellm app.kubernetes.io/component: gateway ports: - name: http port: 4000 targetPort: http protocol: TCP type: ClusterIP --- apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: litellm namespace: llama labels: app.kubernetes.io/name: litellm app.kubernetes.io/component: gateway app.kubernetes.io/part-of: llama-stack annotations: cert-manager.io/cluster-issuer: letsencrypt-production traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" spec: ingressClassName: traefik tls: - hosts: - litellm.mrt0rtikize.ru secretName: web-echo-tls rules: - host: litellm.mrt0rtikize.ru http: paths: - path: / pathType: Prefix backend: service: name: litellm port: number: 4000 --- apiVersion: monitoring.coreos.com/v1 kind: PodMonitor metadata: name: litellm namespace: llama labels: app.kubernetes.io/name: litellm app.kubernetes.io/component: gateway app.kubernetes.io/part-of: llama-stack release: kube-prometheus-stack spec: namespaceSelector: matchNames: - llama selector: matchLabels: app.kubernetes.io/name: litellm app.kubernetes.io/component: gateway podMetricsEndpoints: - port: http path: /metrics interval: 30s