- Add Loki/Prometheus/Grafana stack in logging-stack/ - Add log-ingest service for receiving events from AI stacks - Add Grafana dashboard with stack_name filtering - Update Dokploy client with setApplicationEnv method - Configure STACK_NAME env var for deployed stacks - Add alerting rules for stack health monitoring
63 lines
1.7 KiB
YAML
63 lines
1.7 KiB
YAML
global:
|
|
scrape_interval: 15s
|
|
evaluation_interval: 15s
|
|
external_labels:
|
|
monitor: 'ai-stack-monitor'
|
|
|
|
alerting:
|
|
alertmanagers:
|
|
- static_configs:
|
|
- targets: []
|
|
|
|
rule_files:
|
|
- /etc/prometheus/alerting/*.yml
|
|
|
|
scrape_configs:
|
|
- job_name: 'prometheus'
|
|
static_configs:
|
|
- targets: ['localhost:9090']
|
|
|
|
- job_name: 'loki'
|
|
static_configs:
|
|
- targets: ['loki:3100']
|
|
|
|
- job_name: 'log-ingest'
|
|
static_configs:
|
|
- targets: ['log-ingest:3000']
|
|
|
|
- job_name: 'ai-stacks'
|
|
docker_sd_configs:
|
|
- host: unix:///var/run/docker.sock
|
|
refresh_interval: 30s
|
|
relabel_configs:
|
|
- source_labels: [__meta_docker_container_name]
|
|
regex: '/(ai-stack-.*|app-.*opencode.*)'
|
|
action: keep
|
|
- source_labels: [__meta_docker_container_name]
|
|
regex: '/?(.*)'
|
|
target_label: container
|
|
- source_labels: [__meta_docker_port_private]
|
|
regex: '9090'
|
|
action: keep
|
|
- source_labels: [__meta_docker_container_label_com_docker_swarm_service_name]
|
|
target_label: service
|
|
- source_labels: [__meta_docker_container_label_stack_name]
|
|
target_label: stack_name
|
|
- source_labels: [__meta_docker_container_name]
|
|
regex: '.*opencode-([a-z0-9-]+).*'
|
|
replacement: '${1}'
|
|
target_label: stack_name
|
|
- source_labels: [__meta_docker_container_name]
|
|
regex: '.*ai-stack-([a-z0-9-]+).*'
|
|
replacement: '${1}'
|
|
target_label: stack_name
|
|
- target_label: __address__
|
|
replacement: '${1}:9090'
|
|
source_labels: [__meta_docker_container_network_ip]
|
|
|
|
- job_name: 'ai-stacks-static'
|
|
file_sd_configs:
|
|
- files:
|
|
- /etc/prometheus/targets/*.json
|
|
refresh_interval: 30s
|