feat: add comprehensive logging infrastructure
- Add Loki/Prometheus/Grafana stack in logging-stack/ - Add log-ingest service for receiving events from AI stacks - Add Grafana dashboard with stack_name filtering - Update Dokploy client with setApplicationEnv method - Configure STACK_NAME env var for deployed stacks - Add alerting rules for stack health monitoring
This commit is contained in:
@@ -0,0 +1,11 @@
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: 'AI Stack Dashboards'
|
||||
orgId: 1
|
||||
folder: 'AI Stacks'
|
||||
type: file
|
||||
disableDeletion: false
|
||||
editable: true
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards
|
||||
@@ -0,0 +1,17 @@
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://prometheus:9090
|
||||
isDefault: true
|
||||
editable: false
|
||||
|
||||
- name: Loki
|
||||
type: loki
|
||||
access: proxy
|
||||
url: http://loki:3100
|
||||
editable: false
|
||||
jsonData:
|
||||
maxLines: 1000
|
||||
51
logging-stack/config/loki-config.yml
Normal file
51
logging-stack/config/loki-config.yml
Normal file
@@ -0,0 +1,51 @@
|
||||
auth_enabled: false
|
||||
|
||||
server:
|
||||
http_listen_port: 3100
|
||||
grpc_listen_port: 9096
|
||||
|
||||
common:
|
||||
instance_addr: 127.0.0.1
|
||||
path_prefix: /loki
|
||||
storage:
|
||||
filesystem:
|
||||
chunks_directory: /loki/chunks
|
||||
rules_directory: /loki/rules
|
||||
replication_factor: 1
|
||||
ring:
|
||||
kvstore:
|
||||
store: inmemory
|
||||
|
||||
query_range:
|
||||
results_cache:
|
||||
cache:
|
||||
embedded_cache:
|
||||
enabled: true
|
||||
max_size_mb: 100
|
||||
|
||||
schema_config:
|
||||
configs:
|
||||
- from: 2020-10-24
|
||||
store: boltdb-shipper
|
||||
object_store: filesystem
|
||||
schema: v11
|
||||
index:
|
||||
prefix: index_
|
||||
period: 24h
|
||||
|
||||
ruler:
|
||||
alertmanager_url: http://localhost:9093
|
||||
|
||||
limits_config:
|
||||
retention_period: 168h
|
||||
ingestion_rate_mb: 10
|
||||
ingestion_burst_size_mb: 20
|
||||
max_streams_per_user: 10000
|
||||
max_line_size: 256kb
|
||||
|
||||
compactor:
|
||||
working_directory: /loki/compactor
|
||||
shared_store: filesystem
|
||||
retention_enabled: true
|
||||
retention_delete_delay: 2h
|
||||
retention_delete_worker_count: 150
|
||||
62
logging-stack/config/prometheus.yml
Normal file
62
logging-stack/config/prometheus.yml
Normal file
@@ -0,0 +1,62 @@
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
external_labels:
|
||||
monitor: 'ai-stack-monitor'
|
||||
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets: []
|
||||
|
||||
rule_files:
|
||||
- /etc/prometheus/alerting/*.yml
|
||||
|
||||
scrape_configs:
|
||||
- job_name: 'prometheus'
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
|
||||
- job_name: 'loki'
|
||||
static_configs:
|
||||
- targets: ['loki:3100']
|
||||
|
||||
- job_name: 'log-ingest'
|
||||
static_configs:
|
||||
- targets: ['log-ingest:3000']
|
||||
|
||||
- job_name: 'ai-stacks'
|
||||
docker_sd_configs:
|
||||
- host: unix:///var/run/docker.sock
|
||||
refresh_interval: 30s
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_docker_container_name]
|
||||
regex: '/(ai-stack-.*|app-.*opencode.*)'
|
||||
action: keep
|
||||
- source_labels: [__meta_docker_container_name]
|
||||
regex: '/?(.*)'
|
||||
target_label: container
|
||||
- source_labels: [__meta_docker_port_private]
|
||||
regex: '9090'
|
||||
action: keep
|
||||
- source_labels: [__meta_docker_container_label_com_docker_swarm_service_name]
|
||||
target_label: service
|
||||
- source_labels: [__meta_docker_container_label_stack_name]
|
||||
target_label: stack_name
|
||||
- source_labels: [__meta_docker_container_name]
|
||||
regex: '.*opencode-([a-z0-9-]+).*'
|
||||
replacement: '${1}'
|
||||
target_label: stack_name
|
||||
- source_labels: [__meta_docker_container_name]
|
||||
regex: '.*ai-stack-([a-z0-9-]+).*'
|
||||
replacement: '${1}'
|
||||
target_label: stack_name
|
||||
- target_label: __address__
|
||||
replacement: '${1}:9090'
|
||||
source_labels: [__meta_docker_container_network_ip]
|
||||
|
||||
- job_name: 'ai-stacks-static'
|
||||
file_sd_configs:
|
||||
- files:
|
||||
- /etc/prometheus/targets/*.json
|
||||
refresh_interval: 30s
|
||||
71
logging-stack/config/promtail-config.yml
Normal file
71
logging-stack/config/promtail-config.yml
Normal file
@@ -0,0 +1,71 @@
|
||||
server:
|
||||
http_listen_port: 9080
|
||||
grpc_listen_port: 0
|
||||
|
||||
positions:
|
||||
filename: /tmp/positions.yaml
|
||||
|
||||
clients:
|
||||
- url: http://loki:3100/loki/api/v1/push
|
||||
|
||||
scrape_configs:
|
||||
- job_name: docker
|
||||
docker_sd_configs:
|
||||
- host: unix:///var/run/docker.sock
|
||||
refresh_interval: 5s
|
||||
relabel_configs:
|
||||
- source_labels: ['__meta_docker_container_name']
|
||||
regex: '/(.*)'
|
||||
target_label: 'container'
|
||||
- source_labels: ['__meta_docker_container_label_com_docker_swarm_service_name']
|
||||
target_label: 'service'
|
||||
- source_labels: ['__meta_docker_container_label_com_docker_compose_project']
|
||||
target_label: 'project'
|
||||
- source_labels: ['__meta_docker_container_name']
|
||||
regex: '/?(ai-stack-.*|app-.*opencode.*)'
|
||||
action: keep
|
||||
- source_labels: ['__meta_docker_container_label_stack_name']
|
||||
target_label: 'stack_name'
|
||||
- source_labels: ['__meta_docker_container_name']
|
||||
regex: '.*opencode-([a-z0-9-]+).*'
|
||||
target_label: 'stack_name'
|
||||
- source_labels: ['__meta_docker_container_name']
|
||||
regex: '.*ai-stack-([a-z0-9-]+).*'
|
||||
target_label: 'stack_name'
|
||||
pipeline_stages:
|
||||
- json:
|
||||
expressions:
|
||||
output: log
|
||||
stream: stream
|
||||
timestamp: time
|
||||
- labels:
|
||||
stream:
|
||||
- timestamp:
|
||||
source: timestamp
|
||||
format: RFC3339Nano
|
||||
- output:
|
||||
source: output
|
||||
|
||||
- job_name: ai-stack-events
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost
|
||||
labels:
|
||||
job: ai-stack-events
|
||||
__path__: /var/log/ai-stack/*.jsonl
|
||||
pipeline_stages:
|
||||
- json:
|
||||
expressions:
|
||||
stack_name: stack_name
|
||||
session_id: session_id
|
||||
event_type: event_type
|
||||
model: data.model
|
||||
agent: data.agent
|
||||
tool: data.tool
|
||||
- labels:
|
||||
stack_name:
|
||||
session_id:
|
||||
event_type:
|
||||
model:
|
||||
agent:
|
||||
tool:
|
||||
Reference in New Issue
Block a user