Compare commits

...

1 Commits

Author SHA1 Message Date
Pradeep Kumar
f46f01f2f4 feat: adds endpoints for overview page.
Adds /api/v1/system/{source}/dashboard (GET/PUT/DELETE)
2026-04-27 02:24:57 +05:30
21 changed files with 1311 additions and 7 deletions

107
ai-o11y-overview.json Normal file
View File

@@ -0,0 +1,107 @@
{
"title": "AI Observability Overview",
"description": "AI / LLM observability overview — cost, tokens, latency, errors, RED for tool calls, and time to first token. Scoped by model, environment and service (apply via the variable bar).",
"tags": ["ai", "llm", "genai", "overview"],
"version": "v5",
"variables": {
"model": {
"id": "a1000000-0000-0000-0000-000000000001",
"name": "model",
"key": "model",
"description": "LLM model",
"type": "QUERY",
"sort": "ASC",
"multiSelect": true,
"showALLOption": true,
"allSelected": true,
"queryValue": "SELECT DISTINCT stringTagMap['gen_ai.request.model'] AS model FROM signoz_traces.distributed_signoz_index_v3 WHERE has(stringTagMap, 'gen_ai.request.model') AND timestamp >= now() - INTERVAL 1 DAY",
"customValue": "",
"textboxValue": "",
"selectedValue": [],
"order": 0,
"modificationUUID": "a1000000-0000-0000-0000-000000000011"
},
"environment": {
"id": "a1000000-0000-0000-0000-000000000002",
"name": "environment",
"key": "environment",
"description": "Deployment environment",
"type": "QUERY",
"sort": "ASC",
"multiSelect": true,
"showALLOption": true,
"allSelected": true,
"queryValue": "SELECT DISTINCT resourceTagsMap['deployment.environment'] AS environment FROM signoz_traces.distributed_signoz_index_v3 WHERE has(resourceTagsMap, 'deployment.environment') AND timestamp >= now() - INTERVAL 1 DAY",
"customValue": "",
"textboxValue": "",
"selectedValue": [],
"order": 1,
"modificationUUID": "a1000000-0000-0000-0000-000000000012"
},
"service_name": {
"id": "a1000000-0000-0000-0000-000000000003",
"name": "service_name",
"key": "service_name",
"description": "Service name",
"type": "QUERY",
"sort": "ASC",
"multiSelect": true,
"showALLOption": true,
"allSelected": true,
"queryValue": "SELECT DISTINCT serviceName FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= now() - INTERVAL 1 DAY",
"customValue": "",
"textboxValue": "",
"selectedValue": [],
"order": 2,
"modificationUUID": "a1000000-0000-0000-0000-000000000013"
}
},
"layout": [
{"i": "11111111-1111-1111-1111-111111111111", "x": 0, "y": 0, "w": 3, "h": 3, "moved": false, "static": false},
{"i": "22222222-2222-2222-2222-222222222222", "x": 3, "y": 0, "w": 3, "h": 3, "moved": false, "static": false},
{"i": "33333333-3333-3333-3333-333333333333", "x": 6, "y": 0, "w": 2, "h": 3, "moved": false, "static": false},
{"i": "44444444-4444-4444-4444-444444444444", "x": 8, "y": 0, "w": 2, "h": 3, "moved": false, "static": false},
{"i": "55555555-5555-5555-5555-555555555555", "x": 10, "y": 0, "w": 2, "h": 3, "moved": false, "static": false},
{"i": "66666666-6666-6666-6666-666666666666", "x": 0, "y": 3, "w": 6, "h": 4, "moved": false, "static": false},
{"i": "77777777-7777-7777-7777-777777777777", "x": 6, "y": 3, "w": 6, "h": 4, "moved": false, "static": false},
{"i": "88888888-8888-8888-8888-888888888888", "x": 0, "y": 7, "w": 6, "h": 4, "moved": false, "static": false},
{"i": "99999999-9999-9999-9999-999999999999", "x": 6, "y": 7, "w": 6, "h": 4, "moved": false, "static": false},
{"i": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "x": 0, "y": 11, "w": 4, "h": 4, "moved": false, "static": false},
{"i": "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", "x": 4, "y": 11, "w": 4, "h": 4, "moved": false, "static": false},
{"i": "cccccccc-cccc-cccc-cccc-cccccccccccc", "x": 8, "y": 11, "w": 4, "h": 4, "moved": false, "static": false},
{"i": "dddddddd-dddd-dddd-dddd-dddddddddddd", "x": 0, "y": 15, "w": 4, "h": 4, "moved": false, "static": false},
{"i": "eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee", "x": 4, "y": 15, "w": 4, "h": 4, "moved": false, "static": false},
{"i": "ffffffff-ffff-ffff-ffff-ffffffffffff", "x": 8, "y": 15, "w": 4, "h": 4, "moved": false, "static": false}
],
"widgets": [
{"id": "11111111-1111-1111-1111-111111111111", "title": "Total cost", "description": "Total LLM cost across all calls. Requires gen_ai.usage.cost attribute.", "panelTypes": "value", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "none", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "q1111111-1111-1111-1111-111111111111", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "sum(gen_ai.usage.cost)"}], "filter": {"expression": ""}, "groupBy": [], "expression": "A", "orderBy": [], "legend": "", "disabled": false, "having": {"expression": ""}, "limit": null, "reduceTo": "sum"}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "22222222-2222-2222-2222-222222222222", "title": "Total tokens", "description": "Sum of input + output tokens.", "panelTypes": "value", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "short", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "q2222222-2222-2222-2222-222222222222", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "sum(gen_ai.usage.input_tokens) + sum(gen_ai.usage.output_tokens)"}], "filter": {"expression": ""}, "groupBy": [], "expression": "A", "orderBy": [], "legend": "", "disabled": false, "having": {"expression": ""}, "limit": null, "reduceTo": "sum"}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "33333333-3333-3333-3333-333333333333", "title": "Avg latency (p95)", "description": "p95 latency of LLM spans.", "panelTypes": "value", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "ns", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "q3333333-3333-3333-3333-333333333333", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "p95(duration_nano)"}], "filter": {"expression": "gen_ai.system != ''"}, "groupBy": [], "expression": "A", "orderBy": [], "legend": "", "disabled": false, "having": {"expression": ""}, "limit": null, "reduceTo": "avg"}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "44444444-4444-4444-4444-444444444444", "title": "Error rate", "description": "Error rate as a percentage of total LLM calls.", "panelTypes": "value", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "percent", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "q4444444-4444-4444-4444-444444444444", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "countIf(has_error = true) * 100 / count()"}], "filter": {"expression": "gen_ai.system != ''"}, "groupBy": [], "expression": "A", "orderBy": [], "legend": "", "disabled": false, "having": {"expression": ""}, "limit": null, "reduceTo": "avg"}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "55555555-5555-5555-5555-555555555555", "title": "TTFT (p95)", "description": "p95 time to first token.", "panelTypes": "value", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "ms", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "q5555555-5555-5555-5555-555555555555", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "p95(gen_ai.server.ttft)"}], "filter": {"expression": ""}, "groupBy": [], "expression": "A", "orderBy": [], "legend": "", "disabled": false, "having": {"expression": ""}, "limit": null, "reduceTo": "avg"}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "66666666-6666-6666-6666-666666666666", "title": "Cost over time", "description": "Cost by model over time.", "panelTypes": "graph", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "none", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "q6666666-6666-6666-6666-666666666666", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "sum(gen_ai.usage.cost)"}], "filter": {"expression": ""}, "groupBy": [{"key": "gen_ai.request.model", "dataType": "string", "type": "tag", "isColumn": false, "isJSON": false}], "expression": "A", "orderBy": [], "legend": "{{gen_ai.request.model}}", "disabled": false, "having": {"expression": ""}, "limit": null}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "77777777-7777-7777-7777-777777777777", "title": "Token usage over time", "description": "Input vs output tokens over time.", "panelTypes": "graph", "nullZeroValues": "zero", "opacity": "1", "isStacked": true, "fillSpans": false, "yAxisUnit": "short", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "q7777777-7777-7777-7777-777777777777", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "sum(gen_ai.usage.input_tokens)"}], "filter": {"expression": ""}, "groupBy": [], "expression": "A", "orderBy": [], "legend": "Input", "disabled": false, "having": {"expression": ""}, "limit": null}, {"queryName": "B", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "sum(gen_ai.usage.output_tokens)"}], "filter": {"expression": ""}, "groupBy": [], "expression": "B", "orderBy": [], "legend": "Output", "disabled": false, "having": {"expression": ""}, "limit": null}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "88888888-8888-8888-8888-888888888888", "title": "LLM call latency percentiles", "description": "p50, p90, p95, p99 latency by model.", "panelTypes": "table", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "ns", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "q8888888-8888-8888-8888-888888888888", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "p50(duration_nano)"}, {"expression": "p90(duration_nano)"}, {"expression": "p95(duration_nano)"}, {"expression": "p99(duration_nano)"}], "filter": {"expression": "gen_ai.system != ''"}, "groupBy": [{"key": "gen_ai.request.model", "dataType": "string", "type": "tag", "isColumn": false, "isJSON": false}], "expression": "A", "orderBy": [], "legend": "{{gen_ai.request.model}}", "disabled": false, "having": {"expression": ""}, "limit": null}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "99999999-9999-9999-9999-999999999999", "title": "LLM call latency over time", "description": "p95 latency trend by model.", "panelTypes": "graph", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "ns", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "q9999999-9999-9999-9999-999999999999", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "p95(duration_nano)"}], "filter": {"expression": "gen_ai.system != ''"}, "groupBy": [{"key": "gen_ai.request.model", "dataType": "string", "type": "tag", "isColumn": false, "isJSON": false}], "expression": "A", "orderBy": [], "legend": "{{gen_ai.request.model}}", "disabled": false, "having": {"expression": ""}, "limit": null}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "title": "Error count", "description": "Errors grouped by error type.", "panelTypes": "graph", "nullZeroValues": "zero", "opacity": "1", "isStacked": true, "fillSpans": false, "yAxisUnit": "short", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "qaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "count()"}], "filter": {"expression": "has_error = true AND gen_ai.system != ''"}, "groupBy": [{"key": "gen_ai.error.type", "dataType": "string", "type": "tag", "isColumn": false, "isJSON": false}], "expression": "A", "orderBy": [], "legend": "{{gen_ai.error.type}}", "disabled": false, "having": {"expression": ""}, "limit": null}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", "title": "Time to first token", "description": "p95 TTFT by model.", "panelTypes": "graph", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "ms", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "qbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "p95(gen_ai.server.ttft)"}], "filter": {"expression": ""}, "groupBy": [{"key": "gen_ai.request.model", "dataType": "string", "type": "tag", "isColumn": false, "isJSON": false}], "expression": "A", "orderBy": [], "legend": "{{gen_ai.request.model}}", "disabled": false, "having": {"expression": ""}, "limit": null}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "cccccccc-cccc-cccc-cccc-cccccccccccc", "title": "Top 10 span names", "description": "Top span names by count across GenAI spans.", "panelTypes": "table", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "none", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "qccccccc-cccc-cccc-cccc-cccccccccccc", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "count()"}], "filter": {"expression": "gen_ai.system != ''"}, "groupBy": [{"key": "name", "dataType": "string", "type": "tag", "isColumn": true, "isJSON": false}], "expression": "A", "orderBy": [{"columnName": "count()", "order": "desc"}], "legend": "{{name}}", "disabled": false, "having": {"expression": ""}, "limit": 10}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "dddddddd-dddd-dddd-dddd-dddddddddddd", "title": "Tool call rate", "description": "Tool call rate per second.", "panelTypes": "graph", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "reqps", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "qddddddd-dddd-dddd-dddd-dddddddddddd", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "rate()"}], "filter": {"expression": "name = 'execute_tool'"}, "groupBy": [], "expression": "A", "orderBy": [], "legend": "req/s", "disabled": false, "having": {"expression": ""}, "limit": null}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee", "title": "Tool error rate", "description": "Percentage of tool calls that errored.", "panelTypes": "graph", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "percent", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "qeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "countIf(has_error = true) * 100 / count()"}], "filter": {"expression": "name = 'execute_tool'"}, "groupBy": [], "expression": "A", "orderBy": [], "legend": "error %", "disabled": false, "having": {"expression": ""}, "limit": null}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "ffffffff-ffff-ffff-ffff-ffffffffffff", "title": "Tool duration (p50)", "description": "Median tool call duration.", "panelTypes": "graph", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "ns", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "qfffffff-ffff-ffff-ffff-ffffffffffff", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "p50(duration_nano)"}], "filter": {"expression": "name = 'execute_tool'"}, "groupBy": [], "expression": "A", "orderBy": [], "legend": "p50", "disabled": false, "having": {"expression": ""}, "limit": null}], "queryFormulas": [], "queryTraceOperator": []}}}
]
}

View File

@@ -2001,6 +2001,8 @@ components:
type: boolean
org_id:
type: string
source:
type: string
updatedAt:
format: date-time
type: string
@@ -2023,6 +2025,27 @@ components:
publicDashboard:
$ref: '#/components/schemas/DashboardtypesGettablePublicDasbhboard'
type: object
DashboardtypesGettableSystemDashboard:
properties:
createdAt:
format: date-time
type: string
createdBy:
type: string
data:
$ref: '#/components/schemas/DashboardtypesStorableDashboardData'
id:
type: string
orgId:
type: string
source:
type: string
updatedAt:
format: date-time
type: string
updatedBy:
type: string
type: object
DashboardtypesPostablePublicDashboard:
properties:
defaultTimeRange:
@@ -2040,6 +2063,11 @@ components:
timeRangeEnabled:
type: boolean
type: object
DashboardtypesUpdatableSystemDashboard:
properties:
data:
$ref: '#/components/schemas/DashboardtypesStorableDashboardData'
type: object
ErrorsJSON:
properties:
code:
@@ -8876,6 +8904,160 @@ paths:
summary: Updates my service account
tags:
- serviceaccount
/api/v1/system/{source}/dashboard:
delete:
deprecated: false
description: This endpoint drops any customisation to the system dashboard and
re-seeds the defaults.
operationId: DeleteSystemDashboard
parameters:
- in: path
name: source
required: true
schema:
type: string
responses:
"204":
content:
application/json:
schema:
type: string
description: No Content
"401":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Unauthorized
"403":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Forbidden
"500":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Internal Server Error
security:
- api_key:
- EDITOR
- tokenizer:
- EDITOR
summary: Reset system dashboard to defaults
tags:
- system-dashboard
get:
deprecated: false
description: This endpoint returns the system-owned dashboard for the caller's
org keyed by source (e.g. ai-o11y-overview).
operationId: GetSystemDashboard
parameters:
- in: path
name: source
required: true
schema:
type: string
responses:
"200":
content:
application/json:
schema:
properties:
data:
$ref: '#/components/schemas/DashboardtypesGettableSystemDashboard'
status:
type: string
required:
- status
- data
type: object
description: OK
"401":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Unauthorized
"403":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Forbidden
"500":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Internal Server Error
security:
- api_key:
- VIEWER
- tokenizer:
- VIEWER
summary: Get system dashboard
tags:
- system-dashboard
put:
deprecated: false
description: This endpoint replaces the dashboard payload for the system dashboard
keyed by source.
operationId: UpdateSystemDashboard
parameters:
- in: path
name: source
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/DashboardtypesUpdatableSystemDashboard'
responses:
"200":
content:
application/json:
schema:
properties:
data:
$ref: '#/components/schemas/DashboardtypesGettableSystemDashboard'
status:
type: string
required:
- status
- data
type: object
description: OK
"401":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Unauthorized
"403":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Forbidden
"500":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Internal Server Error
security:
- api_key:
- EDITOR
- tokenizer:
- EDITOR
summary: Update system dashboard
tags:
- system-dashboard
/api/v1/testChannel:
post:
deprecated: true

View File

@@ -24,6 +24,7 @@ import (
"github.com/SigNoz/signoz/pkg/modules/rulestatehistory"
"github.com/SigNoz/signoz/pkg/modules/serviceaccount"
"github.com/SigNoz/signoz/pkg/modules/session"
"github.com/SigNoz/signoz/pkg/modules/systemdashboard"
"github.com/SigNoz/signoz/pkg/modules/user"
"github.com/SigNoz/signoz/pkg/querier"
"github.com/SigNoz/signoz/pkg/ruler"
@@ -48,6 +49,7 @@ type provider struct {
flaggerHandler flagger.Handler
dashboardModule dashboard.Module
dashboardHandler dashboard.Handler
systemDashboardHandler systemdashboard.Handler
metricsExplorerHandler metricsexplorer.Handler
gatewayHandler gateway.Handler
fieldsHandler fields.Handler
@@ -76,6 +78,7 @@ func NewFactory(
flaggerHandler flagger.Handler,
dashboardModule dashboard.Module,
dashboardHandler dashboard.Handler,
systemDashboardHandler systemdashboard.Handler,
metricsExplorerHandler metricsexplorer.Handler,
gatewayHandler gateway.Handler,
fieldsHandler fields.Handler,
@@ -107,6 +110,7 @@ func NewFactory(
flaggerHandler,
dashboardModule,
dashboardHandler,
systemDashboardHandler,
metricsExplorerHandler,
gatewayHandler,
fieldsHandler,
@@ -140,6 +144,7 @@ func newProvider(
flaggerHandler flagger.Handler,
dashboardModule dashboard.Module,
dashboardHandler dashboard.Handler,
systemDashboardHandler systemdashboard.Handler,
metricsExplorerHandler metricsexplorer.Handler,
gatewayHandler gateway.Handler,
fieldsHandler fields.Handler,
@@ -171,6 +176,7 @@ func newProvider(
flaggerHandler: flaggerHandler,
dashboardModule: dashboardModule,
dashboardHandler: dashboardHandler,
systemDashboardHandler: systemDashboardHandler,
metricsExplorerHandler: metricsExplorerHandler,
gatewayHandler: gatewayHandler,
fieldsHandler: fieldsHandler,
@@ -236,6 +242,10 @@ func (provider *provider) AddToRouter(router *mux.Router) error {
return err
}
if err := provider.addSystemDashboardRoutes(router); err != nil {
return err
}
if err := provider.addMetricsExplorerRoutes(router); err != nil {
return err
}

View File

@@ -0,0 +1,66 @@
package signozapiserver
import (
"net/http"
"github.com/gorilla/mux"
"github.com/SigNoz/signoz/pkg/http/handler"
"github.com/SigNoz/signoz/pkg/types"
"github.com/SigNoz/signoz/pkg/types/dashboardtypes"
)
func (provider *provider) addSystemDashboardRoutes(router *mux.Router) error {
if err := router.Handle("/api/v1/system/{source}/dashboard", handler.New(provider.authZ.ViewAccess(provider.systemDashboardHandler.Get), handler.OpenAPIDef{
ID: "GetSystemDashboard",
Tags: []string{"system-dashboard"},
Summary: "Get system dashboard",
Description: "This endpoint returns the system dashboard for the callers org keyed by source (e.g. ai-o11y-overview).",
Request: nil,
RequestContentType: "",
Response: new(dashboardtypes.GettableSystemDashboard),
ResponseContentType: "application/json",
SuccessStatusCode: http.StatusOK,
ErrorStatusCodes: []int{},
Deprecated: false,
SecuritySchemes: newSecuritySchemes(types.RoleViewer),
})).Methods(http.MethodGet).GetError(); err != nil {
return err
}
if err := router.Handle("/api/v1/system/{source}/dashboard", handler.New(provider.authZ.EditAccess(provider.systemDashboardHandler.Update), handler.OpenAPIDef{
ID: "UpdateSystemDashboard",
Tags: []string{"system-dashboard"},
Summary: "Update system dashboard",
Description: "This endpoint replaces the dashboard payload for the system dashboard keyed by source.",
Request: new(dashboardtypes.UpdatableSystemDashboard),
RequestContentType: "application/json",
Response: new(dashboardtypes.GettableSystemDashboard),
ResponseContentType: "application/json",
SuccessStatusCode: http.StatusOK,
ErrorStatusCodes: []int{},
Deprecated: false,
SecuritySchemes: newSecuritySchemes(types.RoleEditor),
})).Methods(http.MethodPut).GetError(); err != nil {
return err
}
if err := router.Handle("/api/v1/system/{source}/dashboard", handler.New(provider.authZ.EditAccess(provider.systemDashboardHandler.Delete), handler.OpenAPIDef{
ID: "DeleteSystemDashboard",
Tags: []string{"system-dashboard"},
Summary: "Reset system dashboard to defaults",
Description: "This endpoint drops any customisation to the system dashboard and sets the defaults.",
Request: nil,
RequestContentType: "",
Response: nil,
ResponseContentType: "application/json",
SuccessStatusCode: http.StatusNoContent,
ErrorStatusCodes: []int{},
Deprecated: false,
SecuritySchemes: newSecuritySchemes(types.RoleEditor),
})).Methods(http.MethodDelete).GetError(); err != nil {
return err
}
return nil
}

View File

@@ -21,7 +21,7 @@ func NewStore(sqlstore sqlstore.SQLStore) dashboardtypes.Store {
func (store *store) Create(ctx context.Context, storabledashboard *dashboardtypes.StorableDashboard) error {
_, err := store.
sqlstore.
BunDB().
BunDBCtx(ctx).
NewInsert().
Model(storabledashboard).
Exec(ctx)
@@ -55,6 +55,7 @@ func (store *store) Get(ctx context.Context, orgID valuer.UUID, id valuer.UUID)
Model(storableDashboard).
Where("id = ?", id).
Where("org_id = ?", orgID).
Where("source = ?", "").
Scan(ctx)
if err != nil {
return nil, store.sqlstore.WrapNotFoundErrf(err, errors.CodeNotFound, "dashboard with id %s doesn't exist", id)
@@ -63,6 +64,23 @@ func (store *store) Get(ctx context.Context, orgID valuer.UUID, id valuer.UUID)
return storableDashboard, nil
}
func (store *store) GetBySource(ctx context.Context, orgID valuer.UUID, source string) (*dashboardtypes.StorableDashboard, error) {
storableDashboard := new(dashboardtypes.StorableDashboard)
err := store.
sqlstore.
BunDBCtx(ctx).
NewSelect().
Model(storableDashboard).
Where("org_id = ?", orgID).
Where("source = ?", source).
Scan(ctx)
if err != nil {
return nil, store.sqlstore.WrapNotFoundErrf(err, errors.CodeNotFound, "system dashboard with source %s doesn't exist", source)
}
return storableDashboard, nil
}
func (store *store) GetPublic(ctx context.Context, dashboardID string) (*dashboardtypes.StorablePublicDashboard, error) {
storable := new(dashboardtypes.StorablePublicDashboard)
err := store.
@@ -124,6 +142,7 @@ func (store *store) List(ctx context.Context, orgID valuer.UUID) ([]*dashboardty
NewSelect().
Model(&storableDashboards).
Where("org_id = ?", orgID).
Where("source = ?", "").
Scan(ctx)
if err != nil {
return nil, err
@@ -158,6 +177,7 @@ func (store *store) Update(ctx context.Context, orgID valuer.UUID, storableDashb
Model(storableDashboard).
WherePK().
Where("org_id = ?", orgID).
Where("source = ?", "").
Exec(ctx)
if err != nil {
return store.sqlstore.WrapNotFoundErrf(err, errors.CodeNotFound, "dashboard with id %s doesn't exist", storableDashboard.ID)
@@ -166,6 +186,23 @@ func (store *store) Update(ctx context.Context, orgID valuer.UUID, storableDashb
return nil
}
func (store *store) UpdateBySource(ctx context.Context, orgID valuer.UUID, source string, storableDashboard *dashboardtypes.StorableDashboard) error {
_, err := store.
sqlstore.
BunDBCtx(ctx).
NewUpdate().
Model(storableDashboard).
WherePK().
Where("org_id = ?", orgID).
Where("source = ?", source).
Exec(ctx)
if err != nil {
return store.sqlstore.WrapNotFoundErrf(err, errors.CodeNotFound, "system dashboard with source %s doesn't exist", source)
}
return nil
}
func (store *store) UpdatePublic(ctx context.Context, storable *dashboardtypes.StorablePublicDashboard) error {
_, err := store.
sqlstore.
@@ -189,6 +226,7 @@ func (store *store) Delete(ctx context.Context, orgID valuer.UUID, id valuer.UUI
Model(new(dashboardtypes.StorableDashboard)).
Where("id = ?", id).
Where("org_id = ?", orgID).
Where("source = ?", "").
Exec(ctx)
if err != nil {
return store.sqlstore.WrapNotFoundErrf(err, errors.CodeNotFound, "dashboard with id %s doesn't exist", id)
@@ -197,6 +235,22 @@ func (store *store) Delete(ctx context.Context, orgID valuer.UUID, id valuer.UUI
return nil
}
func (store *store) DeleteBySource(ctx context.Context, orgID valuer.UUID, source string) error {
_, err := store.
sqlstore.
BunDBCtx(ctx).
NewDelete().
Model(new(dashboardtypes.StorableDashboard)).
Where("org_id = ?", orgID).
Where("source = ?", source).
Exec(ctx)
if err != nil {
return store.sqlstore.WrapNotFoundErrf(err, errors.CodeNotFound, "system dashboard with source %s doesn't exist", source)
}
return nil
}
func (store *store) DeletePublic(ctx context.Context, dashboardID string) error {
_, err := store.
sqlstore.

View File

@@ -6,18 +6,20 @@ import (
"github.com/SigNoz/signoz/pkg/alertmanager"
"github.com/SigNoz/signoz/pkg/modules/organization"
"github.com/SigNoz/signoz/pkg/modules/quickfilter"
"github.com/SigNoz/signoz/pkg/modules/systemdashboard"
"github.com/SigNoz/signoz/pkg/types"
"github.com/SigNoz/signoz/pkg/valuer"
)
type setter struct {
store types.OrganizationStore
alertmanager alertmanager.Alertmanager
quickfilter quickfilter.Module
store types.OrganizationStore
alertmanager alertmanager.Alertmanager
quickfilter quickfilter.Module
systemDashboard systemdashboard.Module
}
func NewSetter(store types.OrganizationStore, alertmanager alertmanager.Alertmanager, quickfilter quickfilter.Module) organization.Setter {
return &setter{store: store, alertmanager: alertmanager, quickfilter: quickfilter}
func NewSetter(store types.OrganizationStore, alertmanager alertmanager.Alertmanager, quickfilter quickfilter.Module, systemDashboard systemdashboard.Module) organization.Setter {
return &setter{store: store, alertmanager: alertmanager, quickfilter: quickfilter, systemDashboard: systemDashboard}
}
func (module *setter) Create(ctx context.Context, organization *types.Organization, createManagedRoles func(context.Context, valuer.UUID) error) error {
@@ -33,6 +35,10 @@ func (module *setter) Create(ctx context.Context, organization *types.Organizati
return err
}
if err := module.systemDashboard.SetDefaultConfig(ctx, organization.ID); err != nil {
return err
}
if err := createManagedRoles(ctx, organization.ID); err != nil {
return err
}

View File

@@ -0,0 +1,132 @@
package implsystemdashboard
import (
"context"
"encoding/json"
"net/http"
"time"
"github.com/gorilla/mux"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/http/render"
"github.com/SigNoz/signoz/pkg/modules/systemdashboard"
"github.com/SigNoz/signoz/pkg/types/authtypes"
"github.com/SigNoz/signoz/pkg/types/dashboardtypes"
"github.com/SigNoz/signoz/pkg/valuer"
)
type handler struct {
module systemdashboard.Module
}
func NewHandler(module systemdashboard.Module) systemdashboard.Handler {
return &handler{module: module}
}
func (handler *handler) Get(rw http.ResponseWriter, r *http.Request) {
ctx, cancel := context.WithTimeout(r.Context(), 10*time.Second)
defer cancel()
claims, err := authtypes.ClaimsFromContext(ctx)
if err != nil {
render.Error(rw, err)
return
}
orgID, err := valuer.NewUUID(claims.OrgID)
if err != nil {
render.Error(rw, err)
return
}
source, err := parseSource(r)
if err != nil {
render.Error(rw, err)
return
}
dashboard, err := handler.module.Get(ctx, orgID, source)
if err != nil {
render.Error(rw, err)
return
}
render.Success(rw, http.StatusOK, dashboardtypes.NewGettableSystemDashboardFromDashboard(dashboard))
}
func (handler *handler) Update(rw http.ResponseWriter, r *http.Request) {
ctx, cancel := context.WithTimeout(r.Context(), 10*time.Second)
defer cancel()
claims, err := authtypes.ClaimsFromContext(ctx)
if err != nil {
render.Error(rw, err)
return
}
orgID, err := valuer.NewUUID(claims.OrgID)
if err != nil {
render.Error(rw, err)
return
}
source, err := parseSource(r)
if err != nil {
render.Error(rw, err)
return
}
req := dashboardtypes.UpdatableSystemDashboard{}
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
render.Error(rw, err)
return
}
dashboard, err := handler.module.Update(ctx, orgID, source, claims.Email, req.Data)
if err != nil {
render.Error(rw, err)
return
}
render.Success(rw, http.StatusOK, dashboardtypes.NewGettableSystemDashboardFromDashboard(dashboard))
}
func (handler *handler) Delete(rw http.ResponseWriter, r *http.Request) {
ctx, cancel := context.WithTimeout(r.Context(), 10*time.Second)
defer cancel()
claims, err := authtypes.ClaimsFromContext(ctx)
if err != nil {
render.Error(rw, err)
return
}
orgID, err := valuer.NewUUID(claims.OrgID)
if err != nil {
render.Error(rw, err)
return
}
source, err := parseSource(r)
if err != nil {
render.Error(rw, err)
return
}
if err := handler.module.Delete(ctx, orgID, source); err != nil {
render.Error(rw, err)
return
}
render.Success(rw, http.StatusNoContent, nil)
}
func parseSource(r *http.Request) (dashboardtypes.Source, error) {
raw := mux.Vars(r)["source"]
if raw == "" {
return dashboardtypes.Source{}, errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput, "source is missing in the path")
}
return dashboardtypes.NewSource(raw)
}

View File

@@ -0,0 +1,97 @@
package implsystemdashboard
import (
"context"
"time"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/modules/systemdashboard"
"github.com/SigNoz/signoz/pkg/types/dashboardtypes"
"github.com/SigNoz/signoz/pkg/valuer"
)
type module struct {
store dashboardtypes.Store
}
// NewModule builds a systemdashboard.Module backed by the shared dashboard
// store. System dashboards live in the `dashboard` table with a non-empty
// source column; uniqueness on (org_id, source) is enforced here in the
// application layer (check-before-insert) rather than by a DB constraint.
func NewModule(store dashboardtypes.Store) systemdashboard.Module {
return &module{store: store}
}
func (module *module) Get(ctx context.Context, orgID valuer.UUID, source dashboardtypes.Source) (*dashboardtypes.Dashboard, error) {
storableDashboard, err := module.store.GetBySource(ctx, orgID, source.StringValue())
if err != nil {
return nil, err
}
return dashboardtypes.NewDashboardFromStorableDashboard(storableDashboard), nil
}
func (module *module) Update(ctx context.Context, orgID valuer.UUID, source dashboardtypes.Source, updatedBy string, data dashboardtypes.UpdatableDashboard) (*dashboardtypes.Dashboard, error) {
storableDashboard, err := module.store.GetBySource(ctx, orgID, source.StringValue())
if err != nil {
return nil, err
}
storableDashboard.Data = data
storableDashboard.UpdatedBy = updatedBy
storableDashboard.UpdatedAt = time.Now()
if err := module.store.UpdateBySource(ctx, orgID, source.StringValue(), storableDashboard); err != nil {
return nil, err
}
return dashboardtypes.NewDashboardFromStorableDashboard(storableDashboard), nil
}
func (module *module) Delete(ctx context.Context, orgID valuer.UUID, source dashboardtypes.Source) error {
return module.store.RunInTx(ctx, func(ctx context.Context) error {
if err := module.store.DeleteBySource(ctx, orgID, source.StringValue()); err != nil {
return err
}
return module.setDefaultForSource(ctx, orgID, source)
})
}
func (module *module) SetDefaultConfig(ctx context.Context, orgID valuer.UUID) error {
for _, source := range dashboardtypes.AllSources {
if err := module.setDefaultForSource(ctx, orgID, source); err != nil {
return err
}
}
return nil
}
// setDefaultForSource seeds a single system dashboard for the given org.
// Idempotent — returns nil without writing if a row already exists. Uniqueness
// on (org_id, source) is guaranteed by the caller through this check rather
// than a DB constraint. A theoretical race between concurrent seeders (e.g.
// org setter on one replica vs migration loop on another for a brand-new org)
// is possible but narrow; acceptable for v1, revisit if observed.
func (module *module) setDefaultForSource(ctx context.Context, orgID valuer.UUID, source dashboardtypes.Source) error {
existing, err := module.store.GetBySource(ctx, orgID, source.StringValue())
if err != nil && !errors.Ast(err, errors.TypeNotFound) {
return err
}
if existing != nil {
return nil
}
dashboard, err := dashboardtypes.NewSystemDashboardDefault(orgID, source)
if err != nil {
return err
}
storableDashboard, err := dashboardtypes.NewStorableDashboardFromDashboard(dashboard)
if err != nil {
return err
}
return module.store.Create(ctx, storableDashboard)
}

View File

@@ -0,0 +1,24 @@
package systemdashboard
import (
"context"
"net/http"
"github.com/SigNoz/signoz/pkg/types/dashboardtypes"
"github.com/SigNoz/signoz/pkg/valuer"
)
type Module interface {
Get(ctx context.Context, orgID valuer.UUID, source dashboardtypes.Source) (*dashboardtypes.Dashboard, error)
Update(ctx context.Context, orgID valuer.UUID, source dashboardtypes.Source, updatedBy string, data dashboardtypes.UpdatableDashboard) (*dashboardtypes.Dashboard, error)
Delete(ctx context.Context, orgID valuer.UUID, source dashboardtypes.Source) error
SetDefaultConfig(ctx context.Context, orgID valuer.UUID) error
}
// Handler defines the HTTP handler interface for system dashboard endpoints.
// /api/v1/system/{source}/dashboard.
type Handler interface {
Get(http.ResponseWriter, *http.Request)
Update(http.ResponseWriter, *http.Request)
Delete(http.ResponseWriter, *http.Request)
}

View File

@@ -38,6 +38,8 @@ import (
"github.com/SigNoz/signoz/pkg/modules/services/implservices"
"github.com/SigNoz/signoz/pkg/modules/spanpercentile"
"github.com/SigNoz/signoz/pkg/modules/spanpercentile/implspanpercentile"
"github.com/SigNoz/signoz/pkg/modules/systemdashboard"
"github.com/SigNoz/signoz/pkg/modules/systemdashboard/implsystemdashboard"
"github.com/SigNoz/signoz/pkg/modules/tracefunnel"
"github.com/SigNoz/signoz/pkg/modules/tracefunnel/impltracefunnel"
"github.com/SigNoz/signoz/pkg/querier"
@@ -49,6 +51,7 @@ type Handlers struct {
SavedView savedview.Handler
Apdex apdex.Handler
Dashboard dashboard.Handler
SystemDashboard systemdashboard.Handler
QuickFilter quickfilter.Handler
TraceFunnel tracefunnel.Handler
RawDataExport rawdataexport.Handler
@@ -90,6 +93,7 @@ func NewHandlers(
SavedView: implsavedview.NewHandler(modules.SavedView),
Apdex: implapdex.NewHandler(modules.Apdex),
Dashboard: impldashboard.NewHandler(modules.Dashboard, providerSettings, authz),
SystemDashboard: implsystemdashboard.NewHandler(modules.SystemDashboard),
QuickFilter: implquickfilter.NewHandler(modules.QuickFilter),
TraceFunnel: impltracefunnel.NewHandler(modules.TraceFunnel),
RawDataExport: implrawdataexport.NewHandler(modules.RawDataExport),

View File

@@ -14,6 +14,7 @@ import (
"github.com/SigNoz/signoz/pkg/modules/authdomain/implauthdomain"
"github.com/SigNoz/signoz/pkg/modules/cloudintegration"
"github.com/SigNoz/signoz/pkg/modules/dashboard"
"github.com/SigNoz/signoz/pkg/modules/dashboard/impldashboard"
"github.com/SigNoz/signoz/pkg/modules/metricsexplorer"
"github.com/SigNoz/signoz/pkg/modules/metricsexplorer/implmetricsexplorer"
"github.com/SigNoz/signoz/pkg/modules/organization"
@@ -37,6 +38,8 @@ import (
"github.com/SigNoz/signoz/pkg/modules/session/implsession"
"github.com/SigNoz/signoz/pkg/modules/spanpercentile"
"github.com/SigNoz/signoz/pkg/modules/spanpercentile/implspanpercentile"
"github.com/SigNoz/signoz/pkg/modules/systemdashboard"
"github.com/SigNoz/signoz/pkg/modules/systemdashboard/implsystemdashboard"
"github.com/SigNoz/signoz/pkg/modules/tracefunnel"
"github.com/SigNoz/signoz/pkg/modules/tracefunnel/impltracefunnel"
"github.com/SigNoz/signoz/pkg/modules/user"
@@ -61,6 +64,7 @@ type Modules struct {
SavedView savedview.Module
Apdex apdex.Module
Dashboard dashboard.Module
SystemDashboard systemdashboard.Module
QuickFilter quickfilter.Module
TraceFunnel tracefunnel.Module
RawDataExport rawdataexport.Module
@@ -98,7 +102,8 @@ func NewModules(
cloudIntegrationModule cloudintegration.Module,
) Modules {
quickfilter := implquickfilter.NewModule(implquickfilter.NewStore(sqlstore))
orgSetter := implorganization.NewSetter(implorganization.NewStore(sqlstore), alertmanager, quickfilter)
systemDashboard := implsystemdashboard.NewModule(impldashboard.NewStore(sqlstore))
orgSetter := implorganization.NewSetter(implorganization.NewStore(sqlstore), alertmanager, quickfilter, systemDashboard)
userSetter := impluser.NewSetter(impluser.NewStore(sqlstore, providerSettings), tokenizer, emailing, providerSettings, orgSetter, authz, analytics, config.User, userRoleStore, userGetter)
ruleStore := sqlrulestore.NewRuleStore(sqlstore, queryParser, providerSettings)
@@ -109,6 +114,7 @@ func NewModules(
SavedView: implsavedview.NewModule(sqlstore),
Apdex: implapdex.NewModule(sqlstore),
Dashboard: dashboard,
SystemDashboard: systemDashboard,
UserSetter: userSetter,
UserGetter: userGetter,
QuickFilter: quickfilter,

View File

@@ -29,6 +29,7 @@ import (
"github.com/SigNoz/signoz/pkg/modules/rulestatehistory"
"github.com/SigNoz/signoz/pkg/modules/serviceaccount"
"github.com/SigNoz/signoz/pkg/modules/session"
"github.com/SigNoz/signoz/pkg/modules/systemdashboard"
"github.com/SigNoz/signoz/pkg/modules/user"
"github.com/SigNoz/signoz/pkg/querier"
"github.com/SigNoz/signoz/pkg/ruler"
@@ -60,6 +61,7 @@ func NewOpenAPI(ctx context.Context, instrumentation instrumentation.Instrumenta
struct{ flagger.Handler }{},
struct{ dashboard.Module }{},
struct{ dashboard.Handler }{},
struct{ systemdashboard.Handler }{},
struct{ metricsexplorer.Handler }{},
struct{ gateway.Handler }{},
struct{ fields.Handler }{},

View File

@@ -194,6 +194,7 @@ func NewSQLMigrationProviderFactories(
sqlmigration.NewDeprecateAPIKeyFactory(sqlstore, sqlschema),
sqlmigration.NewServiceAccountAuthzactory(sqlstore),
sqlmigration.NewDropUserDeletedAtFactory(sqlstore, sqlschema),
sqlmigration.NewAddSystemDashboardFactory(sqlstore, sqlschema),
)
}
@@ -270,6 +271,7 @@ func NewAPIServerProviderFactories(orgGetter organization.Getter, authz authz.Au
handlers.FlaggerHandler,
modules.Dashboard,
handlers.Dashboard,
handlers.SystemDashboard,
handlers.MetricsExplorer,
handlers.GatewayHandler,
handlers.Fields,

View File

@@ -0,0 +1,120 @@
package sqlmigration
import (
"context"
"github.com/uptrace/bun"
"github.com/uptrace/bun/migrate"
"github.com/SigNoz/signoz/pkg/factory"
"github.com/SigNoz/signoz/pkg/sqlschema"
"github.com/SigNoz/signoz/pkg/sqlstore"
"github.com/SigNoz/signoz/pkg/types"
"github.com/SigNoz/signoz/pkg/types/dashboardtypes"
"github.com/SigNoz/signoz/pkg/valuer"
)
type addSystemDashboard struct {
sqlstore sqlstore.SQLStore
sqlschema sqlschema.SQLSchema
}
func NewAddSystemDashboardFactory(sqlstore sqlstore.SQLStore, sqlschema sqlschema.SQLSchema) factory.ProviderFactory[SQLMigration, Config] {
return factory.NewProviderFactory(factory.MustNewName("add_system_dashboard"), func(ctx context.Context, ps factory.ProviderSettings, c Config) (SQLMigration, error) {
return &addSystemDashboard{sqlstore: sqlstore, sqlschema: sqlschema}, nil
})
}
func (migration *addSystemDashboard) Register(migrations *migrate.Migrations) error {
if err := migrations.Register(migration.Up, migration.Down); err != nil {
return err
}
return nil
}
func (migration *addSystemDashboard) Up(ctx context.Context, db *bun.DB) error {
tx, err := db.BeginTx(ctx, nil)
if err != nil {
return err
}
defer func() {
_ = tx.Rollback()
}()
exists, err := migration.sqlstore.Dialect().ColumnExists(ctx, tx, "dashboard", "source")
if err != nil {
return err
}
if !exists {
table, uniqueConstraints, err := migration.sqlschema.GetTable(ctx, sqlschema.TableName("dashboard"))
if err != nil {
return err
}
column := &sqlschema.Column{
Name: sqlschema.ColumnName("source"),
DataType: sqlschema.DataTypeText,
Nullable: false,
}
sqls := migration.sqlschema.Operator().AddColumn(table, uniqueConstraints, column, "")
for _, sql := range sqls {
if _, err := tx.ExecContext(ctx, string(sql)); err != nil {
return err
}
}
}
var orgIDs []string
if err := tx.NewSelect().Model((*types.Organization)(nil)).Column("id").Scan(ctx, &orgIDs); err != nil {
return err
}
for _, rawOrgID := range orgIDs {
orgID, err := valuer.NewUUID(rawOrgID)
if err != nil {
return err
}
for _, source := range dashboardtypes.AllSources {
count, err := tx.NewSelect().
Model((*dashboardtypes.StorableDashboard)(nil)).
Where("org_id = ?", orgID).
Where("source = ?", source.StringValue()).
Count(ctx)
if err != nil {
return err
}
if count > 0 {
continue
}
dashboard, err := dashboardtypes.NewSystemDashboardDefault(orgID, source)
if err != nil {
return err
}
storable, err := dashboardtypes.NewStorableDashboardFromDashboard(dashboard)
if err != nil {
return err
}
if _, err := tx.NewInsert().Model(storable).Exec(ctx); err != nil {
return err
}
}
}
if err := tx.Commit(); err != nil {
return err
}
return nil
}
func (migration *addSystemDashboard) Down(context.Context, *bun.DB) error {
return nil
}

View File

@@ -0,0 +1,107 @@
{
"title": "AI Observability Overview",
"description": "AI / LLM observability overview — cost, tokens, latency, errors, RED for tool calls, and time to first token. Scoped by model, environment and service (apply via the variable bar).",
"tags": ["ai", "llm", "genai", "overview"],
"version": "v5",
"variables": {
"model": {
"id": "a1000000-0000-0000-0000-000000000001",
"name": "model",
"key": "model",
"description": "LLM model",
"type": "QUERY",
"sort": "ASC",
"multiSelect": true,
"showALLOption": true,
"allSelected": true,
"queryValue": "SELECT DISTINCT stringTagMap['gen_ai.request.model'] AS model FROM signoz_traces.distributed_signoz_index_v3 WHERE has(stringTagMap, 'gen_ai.request.model') AND timestamp >= now() - INTERVAL 1 DAY",
"customValue": "",
"textboxValue": "",
"selectedValue": [],
"order": 0,
"modificationUUID": "a1000000-0000-0000-0000-000000000011"
},
"environment": {
"id": "a1000000-0000-0000-0000-000000000002",
"name": "environment",
"key": "environment",
"description": "Deployment environment",
"type": "QUERY",
"sort": "ASC",
"multiSelect": true,
"showALLOption": true,
"allSelected": true,
"queryValue": "SELECT DISTINCT resourceTagsMap['deployment.environment'] AS environment FROM signoz_traces.distributed_signoz_index_v3 WHERE has(resourceTagsMap, 'deployment.environment') AND timestamp >= now() - INTERVAL 1 DAY",
"customValue": "",
"textboxValue": "",
"selectedValue": [],
"order": 1,
"modificationUUID": "a1000000-0000-0000-0000-000000000012"
},
"service_name": {
"id": "a1000000-0000-0000-0000-000000000003",
"name": "service_name",
"key": "service_name",
"description": "Service name",
"type": "QUERY",
"sort": "ASC",
"multiSelect": true,
"showALLOption": true,
"allSelected": true,
"queryValue": "SELECT DISTINCT serviceName FROM signoz_traces.distributed_signoz_index_v3 WHERE timestamp >= now() - INTERVAL 1 DAY",
"customValue": "",
"textboxValue": "",
"selectedValue": [],
"order": 2,
"modificationUUID": "a1000000-0000-0000-0000-000000000013"
}
},
"layout": [
{"i": "11111111-1111-1111-1111-111111111111", "x": 0, "y": 0, "w": 3, "h": 3, "moved": false, "static": false},
{"i": "22222222-2222-2222-2222-222222222222", "x": 3, "y": 0, "w": 3, "h": 3, "moved": false, "static": false},
{"i": "33333333-3333-3333-3333-333333333333", "x": 6, "y": 0, "w": 2, "h": 3, "moved": false, "static": false},
{"i": "44444444-4444-4444-4444-444444444444", "x": 8, "y": 0, "w": 2, "h": 3, "moved": false, "static": false},
{"i": "55555555-5555-5555-5555-555555555555", "x": 10, "y": 0, "w": 2, "h": 3, "moved": false, "static": false},
{"i": "66666666-6666-6666-6666-666666666666", "x": 0, "y": 3, "w": 6, "h": 4, "moved": false, "static": false},
{"i": "77777777-7777-7777-7777-777777777777", "x": 6, "y": 3, "w": 6, "h": 4, "moved": false, "static": false},
{"i": "88888888-8888-8888-8888-888888888888", "x": 0, "y": 7, "w": 6, "h": 4, "moved": false, "static": false},
{"i": "99999999-9999-9999-9999-999999999999", "x": 6, "y": 7, "w": 6, "h": 4, "moved": false, "static": false},
{"i": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "x": 0, "y": 11, "w": 4, "h": 4, "moved": false, "static": false},
{"i": "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", "x": 4, "y": 11, "w": 4, "h": 4, "moved": false, "static": false},
{"i": "cccccccc-cccc-cccc-cccc-cccccccccccc", "x": 8, "y": 11, "w": 4, "h": 4, "moved": false, "static": false},
{"i": "dddddddd-dddd-dddd-dddd-dddddddddddd", "x": 0, "y": 15, "w": 4, "h": 4, "moved": false, "static": false},
{"i": "eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee", "x": 4, "y": 15, "w": 4, "h": 4, "moved": false, "static": false},
{"i": "ffffffff-ffff-ffff-ffff-ffffffffffff", "x": 8, "y": 15, "w": 4, "h": 4, "moved": false, "static": false}
],
"widgets": [
{"id": "11111111-1111-1111-1111-111111111111", "title": "Total cost", "description": "Total LLM cost across all calls. Requires gen_ai.usage.cost attribute.", "panelTypes": "value", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "none", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "q1111111-1111-1111-1111-111111111111", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "sum(gen_ai.usage.cost)"}], "filter": {"expression": ""}, "groupBy": [], "expression": "A", "orderBy": [], "legend": "", "disabled": false, "having": {"expression": ""}, "limit": null, "reduceTo": "sum"}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "22222222-2222-2222-2222-222222222222", "title": "Total tokens", "description": "Sum of input + output tokens.", "panelTypes": "value", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "short", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "q2222222-2222-2222-2222-222222222222", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "sum(gen_ai.usage.input_tokens) + sum(gen_ai.usage.output_tokens)"}], "filter": {"expression": ""}, "groupBy": [], "expression": "A", "orderBy": [], "legend": "", "disabled": false, "having": {"expression": ""}, "limit": null, "reduceTo": "sum"}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "33333333-3333-3333-3333-333333333333", "title": "Avg latency (p95)", "description": "p95 latency of LLM spans.", "panelTypes": "value", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "ns", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "q3333333-3333-3333-3333-333333333333", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "p95(duration_nano)"}], "filter": {"expression": "gen_ai.system != ''"}, "groupBy": [], "expression": "A", "orderBy": [], "legend": "", "disabled": false, "having": {"expression": ""}, "limit": null, "reduceTo": "avg"}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "44444444-4444-4444-4444-444444444444", "title": "Error rate", "description": "Error rate as a percentage of total LLM calls.", "panelTypes": "value", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "percent", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "q4444444-4444-4444-4444-444444444444", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "countIf(has_error = true) * 100 / count()"}], "filter": {"expression": "gen_ai.system != ''"}, "groupBy": [], "expression": "A", "orderBy": [], "legend": "", "disabled": false, "having": {"expression": ""}, "limit": null, "reduceTo": "avg"}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "55555555-5555-5555-5555-555555555555", "title": "TTFT (p95)", "description": "p95 time to first token.", "panelTypes": "value", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "ms", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "q5555555-5555-5555-5555-555555555555", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "p95(gen_ai.server.ttft)"}], "filter": {"expression": ""}, "groupBy": [], "expression": "A", "orderBy": [], "legend": "", "disabled": false, "having": {"expression": ""}, "limit": null, "reduceTo": "avg"}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "66666666-6666-6666-6666-666666666666", "title": "Cost over time", "description": "Cost by model over time.", "panelTypes": "graph", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "none", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "q6666666-6666-6666-6666-666666666666", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "sum(gen_ai.usage.cost)"}], "filter": {"expression": ""}, "groupBy": [{"key": "gen_ai.request.model", "dataType": "string", "type": "tag", "isColumn": false, "isJSON": false}], "expression": "A", "orderBy": [], "legend": "{{gen_ai.request.model}}", "disabled": false, "having": {"expression": ""}, "limit": null}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "77777777-7777-7777-7777-777777777777", "title": "Token usage over time", "description": "Input vs output tokens over time.", "panelTypes": "graph", "nullZeroValues": "zero", "opacity": "1", "isStacked": true, "fillSpans": false, "yAxisUnit": "short", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "q7777777-7777-7777-7777-777777777777", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "sum(gen_ai.usage.input_tokens)"}], "filter": {"expression": ""}, "groupBy": [], "expression": "A", "orderBy": [], "legend": "Input", "disabled": false, "having": {"expression": ""}, "limit": null}, {"queryName": "B", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "sum(gen_ai.usage.output_tokens)"}], "filter": {"expression": ""}, "groupBy": [], "expression": "B", "orderBy": [], "legend": "Output", "disabled": false, "having": {"expression": ""}, "limit": null}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "88888888-8888-8888-8888-888888888888", "title": "LLM call latency percentiles", "description": "p50, p90, p95, p99 latency by model.", "panelTypes": "table", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "ns", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "q8888888-8888-8888-8888-888888888888", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "p50(duration_nano)"}, {"expression": "p90(duration_nano)"}, {"expression": "p95(duration_nano)"}, {"expression": "p99(duration_nano)"}], "filter": {"expression": "gen_ai.system != ''"}, "groupBy": [{"key": "gen_ai.request.model", "dataType": "string", "type": "tag", "isColumn": false, "isJSON": false}], "expression": "A", "orderBy": [], "legend": "{{gen_ai.request.model}}", "disabled": false, "having": {"expression": ""}, "limit": null}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "99999999-9999-9999-9999-999999999999", "title": "LLM call latency over time", "description": "p95 latency trend by model.", "panelTypes": "graph", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "ns", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "q9999999-9999-9999-9999-999999999999", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "p95(duration_nano)"}], "filter": {"expression": "gen_ai.system != ''"}, "groupBy": [{"key": "gen_ai.request.model", "dataType": "string", "type": "tag", "isColumn": false, "isJSON": false}], "expression": "A", "orderBy": [], "legend": "{{gen_ai.request.model}}", "disabled": false, "having": {"expression": ""}, "limit": null}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "title": "Error count", "description": "Errors grouped by error type.", "panelTypes": "graph", "nullZeroValues": "zero", "opacity": "1", "isStacked": true, "fillSpans": false, "yAxisUnit": "short", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "qaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "count()"}], "filter": {"expression": "has_error = true AND gen_ai.system != ''"}, "groupBy": [{"key": "gen_ai.error.type", "dataType": "string", "type": "tag", "isColumn": false, "isJSON": false}], "expression": "A", "orderBy": [], "legend": "{{gen_ai.error.type}}", "disabled": false, "having": {"expression": ""}, "limit": null}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", "title": "Time to first token", "description": "p95 TTFT by model.", "panelTypes": "graph", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "ms", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "qbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "p95(gen_ai.server.ttft)"}], "filter": {"expression": ""}, "groupBy": [{"key": "gen_ai.request.model", "dataType": "string", "type": "tag", "isColumn": false, "isJSON": false}], "expression": "A", "orderBy": [], "legend": "{{gen_ai.request.model}}", "disabled": false, "having": {"expression": ""}, "limit": null}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "cccccccc-cccc-cccc-cccc-cccccccccccc", "title": "Top 10 span names", "description": "Top span names by count across GenAI spans.", "panelTypes": "table", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "none", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "qccccccc-cccc-cccc-cccc-cccccccccccc", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "count()"}], "filter": {"expression": "gen_ai.system != ''"}, "groupBy": [{"key": "name", "dataType": "string", "type": "tag", "isColumn": true, "isJSON": false}], "expression": "A", "orderBy": [{"columnName": "count()", "order": "desc"}], "legend": "{{name}}", "disabled": false, "having": {"expression": ""}, "limit": 10}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "dddddddd-dddd-dddd-dddd-dddddddddddd", "title": "Tool call rate", "description": "Tool call rate per second.", "panelTypes": "graph", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "reqps", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "qddddddd-dddd-dddd-dddd-dddddddddddd", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "rate()"}], "filter": {"expression": "name = 'execute_tool'"}, "groupBy": [], "expression": "A", "orderBy": [], "legend": "req/s", "disabled": false, "having": {"expression": ""}, "limit": null}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee", "title": "Tool error rate", "description": "Percentage of tool calls that errored.", "panelTypes": "graph", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "percent", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "qeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "countIf(has_error = true) * 100 / count()"}], "filter": {"expression": "name = 'execute_tool'"}, "groupBy": [], "expression": "A", "orderBy": [], "legend": "error %", "disabled": false, "having": {"expression": ""}, "limit": null}], "queryFormulas": [], "queryTraceOperator": []}}},
{"id": "ffffffff-ffff-ffff-ffff-ffffffffffff", "title": "Tool duration (p50)", "description": "Median tool call duration.", "panelTypes": "graph", "nullZeroValues": "zero", "opacity": "1", "isStacked": false, "fillSpans": false, "yAxisUnit": "ns", "timePreferance": "GLOBAL_TIME", "softMax": null, "softMin": null, "thresholds": [], "selectedLogFields": [], "selectedTracesFields": [], "query": {"queryType": "builder", "promql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "clickhouse_sql": [{"disabled": false, "legend": "", "name": "A", "query": ""}], "id": "qfffffff-ffff-ffff-ffff-ffffffffffff", "builder": {"queryData": [{"queryName": "A", "stepInterval": 60, "dataSource": "traces", "aggregations": [{"expression": "p50(duration_nano)"}], "filter": {"expression": "name = 'execute_tool'"}, "groupBy": [], "expression": "A", "orderBy": [], "legend": "p50", "disabled": false, "having": {"expression": ""}, "limit": null}], "queryFormulas": [], "queryTraceOperator": []}}}
]
}

View File

@@ -37,6 +37,7 @@ type StorableDashboard struct {
Data StorableDashboardData `bun:"data,type:text,notnull"`
Locked bool `bun:"locked,notnull,default:false"`
OrgID valuer.UUID `bun:"org_id,notnull"`
Source string `bun:"source,type:text,notnull"`
}
type Dashboard struct {
@@ -47,6 +48,7 @@ type Dashboard struct {
Data StorableDashboardData `json:"data"`
Locked bool `json:"locked"`
OrgID valuer.UUID `json:"org_id"`
Source string `json:"source"`
}
type LockUnlockDashboard struct {
@@ -86,6 +88,7 @@ func NewStorableDashboardFromDashboard(dashboard *Dashboard) (*StorableDashboard
OrgID: dashboard.OrgID,
Data: dashboard.Data,
Locked: dashboard.Locked,
Source: dashboard.Source,
}, nil
}
@@ -108,6 +111,31 @@ func NewDashboard(orgID valuer.UUID, createdBy string, storableDashboardData Sto
}, nil
}
// NewSystemDashboard builds a Dashboard owned by the system idetified by source (e.g. "ai-o11y-overview")
func NewSystemDashboard(orgID valuer.UUID, source string, data StorableDashboardData) (*Dashboard, error) {
if source == "" {
return nil, errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput, "source is required for a system dashboard")
}
currentTime := time.Now()
return &Dashboard{
ID: valuer.GenerateUUID().StringValue(),
TimeAuditable: types.TimeAuditable{
CreatedAt: currentTime,
UpdatedAt: currentTime,
},
UserAuditable: types.UserAuditable{
CreatedBy: "system",
UpdatedBy: "system",
},
OrgID: orgID,
Data: data,
Locked: false,
Source: source,
}, nil
}
func NewDashboardFromStorableDashboard(storableDashboard *StorableDashboard) *Dashboard {
return &Dashboard{
ID: storableDashboard.ID.StringValue(),
@@ -122,6 +150,7 @@ func NewDashboardFromStorableDashboard(storableDashboard *StorableDashboard) *Da
OrgID: storableDashboard.OrgID,
Data: storableDashboard.Data,
Locked: storableDashboard.Locked,
Source: storableDashboard.Source,
}
}
@@ -154,6 +183,7 @@ func NewGettableDashboardFromDashboard(dashboard *Dashboard) (*GettableDashboard
OrgID: dashboard.OrgID,
Data: dashboard.Data,
Locked: dashboard.Locked,
Source: dashboard.Source,
}, nil
}

View File

@@ -0,0 +1,30 @@
package dashboardtypes
import (
_ "embed"
"encoding/json"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/valuer"
)
//go:embed ai_o11y_overview.json
var aIO11yOverviewJSON []byte
func NewSystemDashboardDefault(orgID valuer.UUID, source Source) (*Dashboard, error) {
switch source {
case SourceAIO11yOverview:
return newAIO11yOverviewDefault(orgID)
default:
return nil, errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput, "no defaults registered for system dashboard source %s", source.StringValue())
}
}
func newAIO11yOverviewDefault(orgID valuer.UUID) (*Dashboard, error) {
data := StorableDashboardData{}
if err := json.Unmarshal(aIO11yOverviewJSON, &data); err != nil {
return nil, errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "failed to unmarshal embedded ai-o11y-overview default")
}
return NewSystemDashboard(orgID, SourceAIO11yOverview.StringValue(), data)
}

View File

@@ -0,0 +1,44 @@
package dashboardtypes
import (
"encoding/json"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/valuer"
)
type Source struct {
valuer.String
}
var (
SourceAIO11yOverview = Source{valuer.NewString("ai-o11y-overview")}
)
var AllSources = []Source{
SourceAIO11yOverview,
}
func NewSource(s string) (Source, error) {
switch s {
case SourceAIO11yOverview.StringValue():
return SourceAIO11yOverview, nil
default:
return Source{}, errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput, "invalid system dashboard source: %s", s)
}
}
func (source *Source) UnmarshalJSON(data []byte) error {
var str string
if err := json.Unmarshal(data, &str); err != nil {
return err
}
s, err := NewSource(str)
if err != nil {
return err
}
*source = s
return nil
}

View File

@@ -13,6 +13,8 @@ type Store interface {
Get(context.Context, valuer.UUID, valuer.UUID) (*StorableDashboard, error)
GetBySource(context.Context, valuer.UUID, string) (*StorableDashboard, error)
GetPublic(context.Context, string) (*StorablePublicDashboard, error)
GetDashboardByOrgsAndPublicID(context.Context, []string, string) (*StorableDashboard, error)
@@ -25,10 +27,14 @@ type Store interface {
Update(context.Context, valuer.UUID, *StorableDashboard) error
UpdateBySource(context.Context, valuer.UUID, string, *StorableDashboard) error
UpdatePublic(context.Context, *StorablePublicDashboard) error
Delete(context.Context, valuer.UUID, valuer.UUID) error
DeleteBySource(context.Context, valuer.UUID, string) error
DeletePublic(context.Context, string) error
RunInTx(context.Context, func(context.Context) error) error

View File

@@ -0,0 +1,35 @@
package dashboardtypes
import (
"time"
"github.com/SigNoz/signoz/pkg/valuer"
)
type GettableSystemDashboard struct {
ID string `json:"id"`
OrgID valuer.UUID `json:"orgId"`
Source string `json:"source"`
Data StorableDashboardData `json:"data"`
CreatedAt time.Time `json:"createdAt"`
CreatedBy string `json:"createdBy"`
UpdatedAt time.Time `json:"updatedAt"`
UpdatedBy string `json:"updatedBy"`
}
type UpdatableSystemDashboard struct {
Data StorableDashboardData `json:"data"`
}
func NewGettableSystemDashboardFromDashboard(dashboard *Dashboard) *GettableSystemDashboard {
return &GettableSystemDashboard{
ID: dashboard.ID,
OrgID: dashboard.OrgID,
Source: dashboard.Source,
Data: dashboard.Data,
CreatedAt: dashboard.CreatedAt,
CreatedBy: dashboard.CreatedBy,
UpdatedAt: dashboard.UpdatedAt,
UpdatedBy: dashboard.UpdatedBy,
}
}

View File

@@ -0,0 +1,240 @@
# TRD: AI Observability Overview [Going with Option A]
Store AI observability overview page (dashboard). Chosen Option A: reuse the existing `dashboard` table with additions. No separate metadata table — PRD's `system_dashboard` table dropped (single source today; YAGNI).
- Add `is_system` and `source` columns to `dashboard`.
- New endpoints to GET / PUT / DELETE the dashboard (overview page).
- Seed on org create + migration for existing orgs (Pattern 2+3 — matches `alertmanager` / `quickfilter`).
- Alert creation needs no backend change — see Alerts section.
## Datamodel and migration
`pkg/sqlmigration/077_add_system_dashboard.go` (new). Two DDL changes plus a per-org seed, all in one migration tx.
### `dashboard` (existing) — schema changes only
```sql
ALTER TABLE dashboard ADD COLUMN is_system BOOLEAN NOT NULL DEFAULT false;
ALTER TABLE dashboard ADD COLUMN source TEXT;
CREATE UNIQUE INDEX dashboard_source_uq ON dashboard(org_id, source);
```
- `is_system = true` and `source` set ⇒ system dashboard row.
- `source IS NULL` for user dashboards. NULL-tolerant UNIQUE (standard PG/SQLite) lets many user rows coexist per org, exactly one system row per `(org_id, source)`.
- Existing rows: `is_system = false`, `source = NULL` by default. Zero data migration.
### Seed existing orgs (within the same migration)
Schema-change DDL and the per-org seed live in the same `Up()` function, same tx. Follow `063_add_public_dashboard_txn.go` shape — not `030_create_quick_filters.go` (that one only seeds the first org found). Full skeleton in **Seeding → Migration for existing orgs** below. Forward-only, no down migration (matches 063).
## Seeding
Seed on org creation for new orgs, migration loop for existing. Row **always exists** after the migration ships.
### New module entry point
`pkg/modules/systemdashboard/implsystemdashboard/module.go`:
```go
func (m *module) SetDefaultConfig(ctx context.Context, orgID valuer.UUID) error {
storable, err := dashboardtypes.NewAIO11yOverviewDefault(orgID)
if err != nil { return err }
return m.store.Create(ctx, storable)
}
```
Mirrors `quickfilter.Module.SetDefaultConfig` at `pkg/modules/quickfilter/implquickfilter/module.go:109` and `Alertmanager.SetDefaultConfig` at `pkg/alertmanager/signozalertmanager/provider.go:234`. Same two-line body: build defaults from a typed factory, persist via store.
### Hook into org creation
`pkg/modules/organization/implorganization/setter.go` `Create` gets one extra call alongside the existing alertmanager + quickfilter seeding:
```go
if err := module.alertmanager.SetDefaultConfig(ctx, organization.ID.StringValue()); err != nil { return err }
if err := module.quickfilter.SetDefaultConfig(ctx, organization.ID); err != nil { return err }
if err := module.systemDashboard.SetDefaultConfig(ctx, organization.ID); err != nil { return err } // new
if err := createManagedRoles(ctx, organization.ID); err != nil { return err }
```
Same pattern already vetted twice in this file. Reviewers will recognize it.
### Migration for existing orgs
On first deploy, every existing org gets its system-dashboard row from `pkg/sqlmigration/077_add_system_dashboard.go`'s `Up()` — same file that does the schema change. Shape mirrors `pkg/sqlmigration/063_add_public_dashboard_txn.go`:
```go
func (m *addSystemDashboard) Up(ctx context.Context, db *bun.DB) error {
tx, err := db.BeginTx(ctx, nil)
if err != nil { return err }
defer func() { _ = tx.Rollback() }()
// 1. Schema changes
_, _ = tx.ExecContext(ctx, `ALTER TABLE dashboard ADD COLUMN is_system BOOLEAN NOT NULL DEFAULT false`)
_, _ = tx.ExecContext(ctx, `ALTER TABLE dashboard ADD COLUMN source TEXT`)
_, _ = tx.ExecContext(ctx, `CREATE UNIQUE INDEX dashboard_source_uq ON dashboard(org_id, source)`)
// 2. Seed existing orgs
rows, err := tx.QueryContext(ctx, `SELECT id FROM organizations`)
if err != nil { return err }
defer rows.Close()
var orgIDs []string
for rows.Next() {
var id string
if err := rows.Scan(&id); err != nil { return err }
orgIDs = append(orgIDs, id)
}
for _, orgID := range orgIDs {
storable, err := dashboardtypes.NewAIO11yOverviewDefault(valuer.MustNewUUID(orgID))
if err != nil { return err }
_, err = tx.NewInsert().
Model(storable).
On("CONFLICT (org_id, source) DO NOTHING").
Exec(ctx)
if err != nil { return err }
}
return tx.Commit()
}
```
**Registration.** The migration factory is appended to the list in `pkg/signoz/provider.go` (existing dashboard and quickfilter factories sit at lines 124, 150 today):
```go
sqlmigration.NewAddSystemDashboardFactory(sqlstore),
```
Order matters — append at the bottom so it runs after all current migrations. File is numbered `077` assuming no one else lands ahead of it.
**Execution.** `pkg/signoz/signoz.go:277` calls `sqlmigrator.Migrate(ctx)` once on server startup. bun's runner tracks applied migrations in the `bun_migrations` table, so `Up()` fires exactly once per DB lifetime. After that, new orgs go through `organization.Setter.Create → systemDashboard.SetDefaultConfig` instead (above).
**Why the migration does not call `module.SetDefaultConfig`.** Migrations are frozen snapshots. If `SetDefaultConfig` later changes behaviour (side effect, new default shape), re-reading the old migration shouldn't drift. The migration reuses only `dashboardtypes.NewAIO11yOverviewDefault` (stable, schema-level) — same convention as `030_create_quick_filters.go:76` which uses `quickfiltertypes.NewDefaultQuickFilter` but does the insert itself.
**Idempotency — two layers.**
- `ON CONFLICT (org_id, source) DO NOTHING` on the per-org INSERT.
- `bun_migrations` framework tracking — the whole `Up()` only runs once per DB anyway.
Practically, the `ON CONFLICT` branch never fires. It's there because `063_add_public_dashboard_txn.go` (lines 85, 115) does it and reviewers will look for the guard.
### Why not seed on GET
Rejected explicitly:
- GET with side effects breaks HTTP read semantics — kills CDN / replica / retry assumptions.
- Race on concurrent first-GETs requires `ON CONFLICT DO NOTHING` plus tests for the race.
- Defaults freeze at an arbitrary "first click" moment per org — impossible to explain when v1.2 defaults don't land for orgs where someone clicked in v1.1.
- Background probers, health checks, smoke tests would all cause writes.
- Viewer-role GETs would cause DB inserts — RBAC weirdness.
- Bad-default rollback leaves seeded rows with no cleanup path.
- Diverges from the codebase; reviewers will push back.
Seed-on-create + migration avoids all of this: row exists deterministically, GET is a plain SELECT, PUT is a plain UPDATE, downstream features that want a real `dashboard.id` always have one.
### Defaults evolution
Once seeded, orgs own their row. If we ship improved defaults in v1.2, existing orgs keep v1.1 unless:
- The user explicitly resets via `DELETE /api/v1/system/{source}/dashboard` (drops the row; next org-create-style seed puts current defaults back — implementation option: DELETE re-runs `SetDefaultConfig` after drop, inside the same tx).
- Or a follow-up migration hash-matches the old default blob and `UPDATE`s only rows that still equal the old defaults (leaves customized rows alone).
V1 ships only the DELETE/reset path. The hash-match migration is available later if we ever need silent propagation.
## APIs
All route wiring in `pkg/query-service/app/http_handler.go`.
### `GET /api/v1/system/{source}/dashboard`
Plain SELECT on `dashboard WHERE org_id = ? AND source = ? AND is_system = true`. Always finds a row (post-seed). 404 if `{source}` is valid but somehow has no row (shouldn't happen; indicates a migration/seeding bug).
### `PUT /api/v1/system/{source}/dashboard`
Plain UPDATE of `dashboard.data`, `updated_at`, `updated_by` on the matching row. No insert path needed — row already exists.
### `DELETE /api/v1/system/{source}/dashboard` (reset to defaults)
Drop the existing row and call `SetDefaultConfig(orgID)` inside a tx. Next GET returns fresh defaults from code.
### `{source}` validation
Path param validated against a code-side enum (`ai-o11y-overview` only, today). Unknown source → 404.
### Response envelope
All responses: `{ "status": "success", "data": ... }`.
### Authz (open — raise in review)
- `GET``am.ViewAccess` (matches regular dashboard read).
- `PUT` / `DELETE``am.EditAccess` default; decide if we want admin-only instead given these are org-wide views.
### Existing APIs filter
- `pkg/modules/dashboard/impldashboard/store.go` `List` adds `WHERE is_system = false` (or equivalently `WHERE source IS NULL`). Filter lives on the store so every caller inherits it.
- `Module.GetByMetricNames` reads via `Module.List` → inherits the filter. System-dashboard panels don't surface in "dashboards using this metric". If we later want them to, we can add an include-system flag.
- `APIHandler.Get` (`http_handler.go:1091`) dispatches by ID pattern (cloud / installed-integration / SQL). System dashboards are only reached via `{source}` endpoint, so no change needed there.
- `APIHandler.List` (`http_handler.go:1158`) unchanged — integration and cloud-integration dashboards come from separate sources, only the SQL branch needed the filter and it's now on the store.
## Type and store models
`pkg/types/dashboardtypes/dashboard.go`:
- Add `IsSystem bool` on `StorableDashboard` (`bun:"is_system,notnull,default:false"`).
- Add `Source *string` on `StorableDashboard` (`bun:"source,nullzero"`).
- `NewAIO11yOverviewDefault(orgID) (*StorableDashboard, error)` — builds the typed default row (analogous to `quickfiltertypes.NewDefaultQuickFilter`).
`pkg/types/dashboardtypes/store.go`:
- `GetBySource(ctx, orgID, source) (*StorableDashboard, error)` — single-table `WHERE org_id = ? AND source = ? AND is_system = true`.
- Existing `Create`, `Update`, `Delete` reused. No cross-table tx.
- Existing `List` gets the `WHERE is_system = false` filter (or the equivalent `source IS NULL`).
### Default values
Live in code at `pkg/modules/systemdashboard/defaults/ai_o11y_overview.go` as a typed `dashboardtypes.StorableDashboardData`. Source of truth is the imported-and-iterated dashboard JSON at `ai-o11y-overview.json` in this repo — 15 panels in v5 format, validated against staging SigNoz. Dashboard variables (`model`, `environment`, `service_name`) are `QUERY`-type and live inside the same data blob.
Panel layout (matches the v4 mockup):
- **Row 1** — five value panels: Total cost, Total tokens, Avg latency (p95), Error rate, TTFT (p95).
- **Row 2** — Cost over time (by model); Token usage over time (input + output, stacked).
- **Row 3** — LLM call latency percentiles (p50/p90/p95/p99 by model, table); LLM call latency over time (p95 by model).
- **Row 4** — Error count (grouped by `gen_ai.error.type`); Time to first token (p95 by model); Top 10 span names.
- **Row 5** — Tool RED: tool call rate; tool error rate; tool duration (p50). Filtered by span `name = 'execute_tool'`; revisit once `gen_ai.operation.name` is indexed.
Panel widget IDs are stable UUIDs (prefix pattern `11111111-…`, `22222222-…`, …), not generated per org, so analytics correlation and alert `source` URLs stay consistent across orgs.
Known attribute gaps on current instrumentation — panels return "field not found" until these are emitted; acceptable as an interim state, will light up automatically:
- `gen_ai.usage.cost` — Total cost, Cost over time.
- `gen_ai.server.ttft` — TTFT (p95) value, Time to first token graph.
- `gen_ai.operation.name` — Tool RED panels use span `name` as a stopgap.
Variable references were removed from filter expressions to avoid "empty list" errors on first load (QUERY variables are empty until resolved). Variables still apply via the dashboard variable bar.
## Module layout
New module at `pkg/modules/systemdashboard/` with `module.go`, `impldashboard`-style `handler.go`, `store.go`. Keeps `impldashboard` focused on user dashboards. Wired through the existing factory in `cmd/signoz/...`. `organization.Setter` gains the new module as a dependency for the `SetDefaultConfig` call.
## Alerts
No backend change. `frontend/src/hooks/queryBuilder/useCreateAlerts.tsx` resolves dashboard variables client-side and opens `/alerts/new` with a substituted composite query in URL params. `ruletypes.PostableRule` has no `dashboard_id` — only a free-form `source` URL (`pkg/types/ruletypes/api_params.go`). Alerts from a system-dashboard panel are indistinguishable from any other panel-originated alert.
## Test plan
- Unit: `SetDefaultConfig` inserts expected row; module CRUD with mocked store; enum validation for `{source}`; DELETE→seed round-trip.
- Integration: migration creates columns + seeds all existing orgs; post-migration GET returns the seeded default; PUT-then-GET reflects the update; DELETE-then-GET returns fresh defaults again; `GET /api/v1/dashboards` excludes `is_system = true` rows.
- Org lifecycle: creating a new org via `organization.Setter.Create` results in exactly one system-dashboard row for each registered `{source}`.
- Migration: `is_system` defaults to `false`, `source` is NULL for pre-existing user dashboard rows; forward-only, no rollback script.
## Open questions (for review)
- **Authz on PUT / DELETE** — edit access or admin only?
- **Panel-delete guard** — `dashboardtypes.Dashboard.CanUpdate` caps non-API-key callers to one deleted widget per PUT. Inherit for system dashboards or skip?
- **Analytics** — emit a separate `system_dashboard_updated` event, reuse the existing dashboard event, or stay silent?
- **Lock / unlock on system dashboards** — expose the existing lock flow, or leave `locked = false` permanently?
- **Defaults evolution UX (product question)** — when we ship improved default panels in a future release, what does a user who has already customized see? Current v1 answer: *nothing changes until they hit Reset to defaults.* Alternatives: a "defaults updated" banner, auto-adopt additive changes (new panels) while preserving edits, or a diff/merge UI. Decide before we need it.
## Out of scope
Frontend wiring and v2/Perses schema migration — separate reviews. Default panel queries are settled in `ai-o11y-overview.json`; attribute-gap panels (cost, TTFT) light up automatically once instrumentation lands.