diff --git a/ROADMAP.md b/ROADMAP.md index 5342875..9740875 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -11,6 +11,8 @@ - [x] Stack cleanup API - [x] Auto-rollback on failure - [x] Persistent storage volumes +- [x] Logging infrastructure (log-ingest → Loki → Grafana) +- [x] AI Stack monitoring dashboard at logs.intra.flexinit.nl ## Next @@ -24,4 +26,3 @@ - [ ] Integration tests - [ ] Resource limits configuration - [ ] Custom domain support -- [ ] Usage metrics diff --git a/docs/LOGGING-PLAN.md b/docs/LOGGING-PLAN.md index 038df22..0849776 100644 --- a/docs/LOGGING-PLAN.md +++ b/docs/LOGGING-PLAN.md @@ -387,29 +387,35 @@ groups: ## 7. Implementation Checklist ### Phase 1: Container Logging -- [ ] Set up Loki + Promtail on logging server -- [ ] Configure Docker log driver for ai-stack containers -- [ ] Add log rotation to Dockerfile -- [ ] Verify logs flowing to Loki +- [x] Set up Loki + Promtail on logging server (using existing `logs.intra.flexinit.nl`) +- [x] Configure Docker log driver for ai-stack containers +- [x] Add log rotation to Dockerfile +- [x] Verify logs flowing to Loki ### Phase 2: Session Logging -- [ ] Create logging hook in oh-my-opencode -- [ ] Define event schema -- [ ] Implement log shipping (HTTP or file-based) -- [ ] Add session/message/tool logging +- [x] Create logging hook in oh-my-opencode (`/home/odouhou/locale-projects/oh-my-opencode-free-fork/src/hooks/usage-logging/`) +- [x] Define event schema +- [x] Implement log shipping (HTTP-based via log-ingest service) +- [x] Add session/message/tool logging ### Phase 3: Metrics -- [ ] Add prom-client to container -- [ ] Expose /metrics endpoint -- [ ] Configure Prometheus scraping -- [ ] Create initial Grafana dashboards +- [x] Add prom-client to container (`docker/shared-config/metrics-exporter.ts`) +- [x] Expose /metrics endpoint (port 9090) +- [x] Configure Prometheus scraping (datasource added to Grafana) +- [x] Create initial Grafana dashboards (`/d/ai-stack-overview`) ### Phase 4: Production Hardening -- [ ] Implement data anonymization +- [x] Implement data anonymization (content hashed, not stored) - [ ] Set up retention policies - [ ] Configure alerts - [ ] Document runbooks +### Deployed Components (2026-01-10) +- **Log-ingest service**: `http://ai-stack-log-ingest:3000/ingest` (dokploy-network) +- **Grafana dashboard**: https://logs.intra.flexinit.nl/d/ai-stack-overview +- **Datasource UIDs**: Loki (`af9a823s6iku8b`), Prometheus (`cf9r1fmfw9xxcf`) +- **BWS credentials**: `GRAFANA_OPENCODE_ACCESS_TOKEN` (id: `c77e58e3-fb34-41dc-9824-b3ce00da18a0`) + --- ## 8. Cost Estimates diff --git a/logging-stack/docker-compose.yml b/logging-stack/docker-compose.yml index 7ef6eec..14a5d14 100644 --- a/logging-stack/docker-compose.yml +++ b/logging-stack/docker-compose.yml @@ -50,7 +50,7 @@ services: image: prom/prometheus:v2.47.0 container_name: ai-stack-prometheus ports: - - "9090:9090" + - "9091:9090" volumes: - ./config/prometheus.yml:/etc/prometheus/prometheus.yml:ro - ./alerting:/etc/prometheus/alerting:ro diff --git a/logging-stack/log-ingest/Dockerfile b/logging-stack/log-ingest/Dockerfile index 240e3fb..b336d10 100644 --- a/logging-stack/log-ingest/Dockerfile +++ b/logging-stack/log-ingest/Dockerfile @@ -1,13 +1,13 @@ -FROM oven/bun:1.0 +FROM node:20-alpine WORKDIR /app -COPY package.json bun.lockb* ./ -RUN bun install --frozen-lockfile 2>/dev/null || bun install +COPY package.json ./ +RUN npm install COPY . . ENV PORT=3000 EXPOSE 3000 -CMD ["bun", "run", "src/index.ts"] +CMD ["npx", "tsx", "src/index.ts"] diff --git a/logging-stack/log-ingest/docker-compose.yml b/logging-stack/log-ingest/docker-compose.yml new file mode 100644 index 0000000..dc2b2cf --- /dev/null +++ b/logging-stack/log-ingest/docker-compose.yml @@ -0,0 +1,27 @@ +# AI Stack Log Ingest Service +# Connects to existing Loki at logs.intra.flexinit.nl + +services: + log-ingest: + build: + context: . + dockerfile: Dockerfile + container_name: ai-stack-log-ingest + ports: + - "3102:3000" + environment: + # Connect to existing Loki on dokploy-network + - LOKI_URL=http://monitor-grafanaloki-qkj16i-loki-1:3100 + - LOG_LEVEL=info + networks: + - dokploy-network + restart: unless-stopped + healthcheck: + test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3000/health"] + interval: 30s + timeout: 10s + retries: 3 + +networks: + dokploy-network: + external: true diff --git a/logging-stack/log-ingest/package.json b/logging-stack/log-ingest/package.json index ac765a9..e5a3ce6 100644 --- a/logging-stack/log-ingest/package.json +++ b/logging-stack/log-ingest/package.json @@ -3,14 +3,16 @@ "version": "1.0.0", "type": "module", "scripts": { - "start": "bun run src/index.ts", - "dev": "bun --watch run src/index.ts" + "start": "tsx src/index.ts", + "dev": "tsx --watch src/index.ts" }, "dependencies": { "hono": "^4.0.0", - "prom-client": "^15.0.0" + "prom-client": "^15.0.0", + "@hono/node-server": "^1.8.0" }, "devDependencies": { - "@types/bun": "latest" + "tsx": "^4.7.0", + "@types/node": "^20.0.0" } } diff --git a/logging-stack/log-ingest/src/index.ts b/logging-stack/log-ingest/src/index.ts index 14256bf..f5500ad 100644 --- a/logging-stack/log-ingest/src/index.ts +++ b/logging-stack/log-ingest/src/index.ts @@ -1,6 +1,7 @@ import { Hono } from 'hono'; import { cors } from 'hono/cors'; import { logger } from 'hono/logger'; +import { serve } from '@hono/node-server'; import { Registry, Counter, Histogram, Gauge, collectDefaultMetrics } from 'prom-client'; const app = new Hono(); @@ -193,7 +194,7 @@ app.post('/ingest/batch', async (c) => { const port = parseInt(process.env.PORT || '3000'); console.log(`Log ingest service starting on port ${port}`); -export default { - port, - fetch: app.fetch -}; +serve({ + fetch: app.fetch, + port +});