diff --git a/tests/fixtures/querier.py b/tests/fixtures/querier.py index e6bbb73e9f..0c4e4dd331 100644 --- a/tests/fixtures/querier.py +++ b/tests/fixtures/querier.py @@ -862,6 +862,9 @@ def generate_traces_with_corrupt_metadata() -> list[Traces]: "cloud.provider": "integration", "cloud.account.id": "000", "trace_id": "corrupt_data", + # resource keys that look like scope fields + "scope.name": "corrupt_data", + "scope.scope.name": "corrupt_data", }, attributes={ "net.transport": "IP.TCP", @@ -870,6 +873,9 @@ def generate_traces_with_corrupt_metadata() -> list[Traces]: "http.request.method": "POST", "http.response.status_code": "200", "timestamp": "corrupt_data", + # attribute keys colliding with the scope context prefix + "scope.version": "corrupt_data", + "scope.scope.version": "corrupt_data", }, ), Traces( @@ -890,12 +896,23 @@ def generate_traces_with_corrupt_metadata() -> list[Traces]: "cloud.provider": "integration", "cloud.account.id": "000", "timestamp": "corrupt_data", + "scope.attributes.name": "corrupt_data", }, attributes={ "db.name": "integration", "db.operation": "SELECT", "db.statement": "SELECT * FROM integration", "trace_d": "corrupt_data", + "scope.attributes.version": "corrupt_data", + }, + # the only span carrying real scope + the unique scope attribute + scope_name="io.opentelemetry.contrib.http", + scope_version="1.0.0", + scope_attributes={ + "telemetry.sdk.language": "cpp", + "name": "not-the-real-name", + "version": "not-the-real-version", + "attributes": "literally-a-key-named-attributes", }, ), Traces( @@ -916,11 +933,13 @@ def generate_traces_with_corrupt_metadata() -> list[Traces]: "cloud.provider": "integration", "cloud.account.id": "000", "duration_nano": "corrupt_data", + "scope.scope.attributes": "corrupt_data", }, attributes={ "http.request.method": "PATCH", "http.status_code": "404", "id": "1", + "scope.scope.scope": "corrupt_data", }, ), Traces( @@ -940,6 +959,7 @@ def generate_traces_with_corrupt_metadata() -> list[Traces]: "host.name": "linux-001", "cloud.provider": "integration", "cloud.account.id": "001", + "scope.scope": "corrupt_data", }, attributes={ "message.type": "SENT", @@ -947,6 +967,8 @@ def generate_traces_with_corrupt_metadata() -> list[Traces]: "messaging.message.id": "001", "duration_nano": "corrupt_data", "id": 1, + "scope": "corrupt_data", + "scope.attributes": "corrupt_data", }, ), ] diff --git a/tests/fixtures/traces.py b/tests/fixtures/traces.py index ddc4ce7422..529c66ba4a 100644 --- a/tests/fixtures/traces.py +++ b/tests/fixtures/traces.py @@ -286,6 +286,10 @@ class Traces(ABC): db_operation: str has_error: bool is_remote: str + scope_name: str + scope_version: str + scope_string: dict[str, str] + scope_json: dict[str, Any] resource: list[TracesResource] tag_attributes: list[TracesTagAttributes] @@ -311,6 +315,9 @@ class Traces(ABC): links: list[TracesLink] = [], trace_state: str = "", flags: np.uint32 = 0, + scope_name: str = "", + scope_version: str = "", + scope_attributes: dict[str, Any] = {}, resource_write_mode: Literal["legacy_only", "dual_write"] = "dual_write", ) -> None: if timestamp is None: @@ -392,6 +399,30 @@ class Traces(ABC): # Calculate resource fingerprint self.resource_fingerprint = LogsOrTracesFingerprint(self.resources_string).calculate() + # Process scope mirroring the InstrumentationScope on the OTLP span. + self.scope_name = scope_name + self.scope_version = scope_version + self.scope_string = {k: str(v) for k, v in scope_attributes.items()} + self.scope_json = { + "name": self.scope_name, + "version": self.scope_version, + "attributes": self.scope_string, + } + for k, v in self.scope_string.items(): + self.tag_attributes.append( + TracesTagAttributes( + timestamp=timestamp, + tag_key=k, + tag_type="scope", + tag_data_type="string", + string_value=v, + number_value=None, + ) + ) + self.attribute_keys.append( + TracesResourceOrAttributeKeys(name=k, datatype="string", tag_type="scope") + ) + # Process attributes by type and populate custom fields self.attribute_string = {} self.attributes_number = {} @@ -644,6 +675,7 @@ class Traces(ABC): self.has_error, self.is_remote, self.resource_json, + self.scope_json, ], dtype=object, ) @@ -675,6 +707,9 @@ class Traces(ABC): attributes=data.get("attributes", {}), trace_state=data.get("trace_state", ""), flags=data.get("flags", 0), + scope_name=data.get("scope_name", ""), + scope_version=data.get("scope_version", ""), + scope_attributes=data.get("scope_attributes", {}), ) @classmethod @@ -814,6 +849,7 @@ def insert_traces_to_clickhouse(conn, traces: list[Traces]) -> None: "has_error", "is_remote", "resource", + "scope", ], data=[trace.np_arr() for trace in traces], ) diff --git a/tests/integration/tests/querier/04_traces.py b/tests/integration/tests/querier/04_traces.py index ed0f392356..b1a3c97bbb 100644 --- a/tests/integration/tests/querier/04_traces.py +++ b/tests/integration/tests/querier/04_traces.py @@ -709,6 +709,31 @@ def test_traces_list( x[1].trace_id, ], # type: Callable[[List[Traces]], List[Any]] ), + # Case 9: filter on a scope attribute. Only x[1] carries the scope + # attribute telemetry.sdk.language='cpp'. The filter must resolve + # against the scope JSON column's attributes and ignore the + # attribute/resource keys that look like scope fields (e.g. + # "scope.name", "scope.scope.scope") as well as the scope attribute + # keys that collide with the JSON sub-columns (name/version/attributes). + pytest.param( + { + "type": "builder_query", + "spec": { + "name": "A", + "signal": "traces", + "disabled": False, + "selectFields": [{"name": "timestamp"}], + "filter": {"expression": "scope.telemetry.sdk.language = 'cpp'"}, + "limit": 1, + }, + }, + HTTPStatus.OK, + lambda x: [ + x[1].span_id, + format_timestamp(x[1].timestamp), + x[1].trace_id, + ], # type: Callable[[List[Traces]], List[Any]] + ), ], ) def test_traces_list_with_corrupt_data( @@ -755,6 +780,103 @@ def test_traces_list_with_corrupt_data( assert data[key] == value +@pytest.mark.parametrize( + "filter_expression,expected_index", + [ + # Filter on a scope attribute resolves against the scope JSON column's attributes object. + pytest.param("scope.telemetry.sdk.language = 'python'", 1), + pytest.param("scope.telemetry.sdk.language = 'go'", 0), + ], +) +def test_traces_list_with_scope_filter( + signoz: types.SigNoz, + create_user_admin: None, # pylint: disable=unused-argument + get_token: Callable[[str, str], str], + insert_traces: Callable[[list[Traces]], None], + filter_expression: str, + expected_index: int, +) -> None: + """ + Setup: + Insert 2 spans from different instrumentation scopes. + Tests: + Filtering on a scope attribute returns only the matching span. + """ + checkout_trace_id = TraceIdGenerator.trace_id() + checkout_span_id = TraceIdGenerator.span_id() + payment_trace_id = TraceIdGenerator.trace_id() + payment_span_id = TraceIdGenerator.span_id() + + now = datetime.now(tz=UTC).replace(second=0, microsecond=0) + + traces = [ + Traces( + timestamp=now - timedelta(seconds=4), + duration=timedelta(seconds=2), + trace_id=checkout_trace_id, + span_id=checkout_span_id, + parent_span_id="", + name="GET /checkout", + kind=TracesKind.SPAN_KIND_SERVER, + status_code=TracesStatusCode.STATUS_CODE_OK, + resources={"service.name": "checkout"}, + attributes={"http.request.method": "GET"}, + scope_name="io.signoz.checkout", + scope_version="2.3.1", + scope_attributes={"telemetry.sdk.language": "go"}, + ), + Traces( + timestamp=now - timedelta(seconds=2), + duration=timedelta(seconds=1), + trace_id=payment_trace_id, + span_id=payment_span_id, + parent_span_id="", + name="POST /pay", + kind=TracesKind.SPAN_KIND_SERVER, + status_code=TracesStatusCode.STATUS_CODE_OK, + resources={"service.name": "payment"}, + attributes={"http.request.method": "POST"}, + scope_name="io.signoz.payment", + scope_version="4.5.6", + scope_attributes={"telemetry.sdk.language": "python"}, + ), + ] + insert_traces(traces) + + token = get_token(USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD) + + response = make_query_request( + signoz, + token, + start_ms=int((datetime.now(tz=UTC) - timedelta(minutes=5)).timestamp() * 1000), + end_ms=int(datetime.now(tz=UTC).timestamp() * 1000), + request_type="raw", + queries=[ + { + "type": "builder_query", + "spec": { + "name": "A", + "signal": "traces", + "disabled": False, + "selectFields": [{"name": "timestamp"}], + "filter": {"expression": filter_expression}, + "limit": 10, + }, + } + ], + ) + + assert response.status_code == HTTPStatus.OK + + rows = response.json()["data"]["data"]["results"][0]["rows"] + # Exactly one span matches the scope filter. + assert rows is not None + assert len(rows) == 1 + expected = traces[expected_index] + assert rows[0]["data"]["span_id"] == expected.span_id + assert rows[0]["data"]["trace_id"] == expected.trace_id + + def _verify_events_links_full(rows: list[dict], traces: list[Traces]) -> None: """Empty-selectFields case: events/links arrive parsed into structured objects. Every row's events/links should match the fixture's stored parsed shape