test: add integration test for scope fields

This commit is contained in:
Nikhil Soni
2026-06-24 15:25:17 +05:30
parent a73ae62cd1
commit a023c8ed4a
3 changed files with 180 additions and 0 deletions

View File

@@ -862,6 +862,9 @@ def generate_traces_with_corrupt_metadata() -> list[Traces]:
"cloud.provider": "integration",
"cloud.account.id": "000",
"trace_id": "corrupt_data",
# resource keys that look like scope fields
"scope.name": "corrupt_data",
"scope.scope.name": "corrupt_data",
},
attributes={
"net.transport": "IP.TCP",
@@ -870,6 +873,9 @@ def generate_traces_with_corrupt_metadata() -> list[Traces]:
"http.request.method": "POST",
"http.response.status_code": "200",
"timestamp": "corrupt_data",
# attribute keys colliding with the scope context prefix
"scope.version": "corrupt_data",
"scope.scope.version": "corrupt_data",
},
),
Traces(
@@ -890,12 +896,23 @@ def generate_traces_with_corrupt_metadata() -> list[Traces]:
"cloud.provider": "integration",
"cloud.account.id": "000",
"timestamp": "corrupt_data",
"scope.attributes.name": "corrupt_data",
},
attributes={
"db.name": "integration",
"db.operation": "SELECT",
"db.statement": "SELECT * FROM integration",
"trace_d": "corrupt_data",
"scope.attributes.version": "corrupt_data",
},
# the only span carrying real scope + the unique scope attribute
scope_name="io.opentelemetry.contrib.http",
scope_version="1.0.0",
scope_attributes={
"telemetry.sdk.language": "cpp",
"name": "not-the-real-name",
"version": "not-the-real-version",
"attributes": "literally-a-key-named-attributes",
},
),
Traces(
@@ -916,11 +933,13 @@ def generate_traces_with_corrupt_metadata() -> list[Traces]:
"cloud.provider": "integration",
"cloud.account.id": "000",
"duration_nano": "corrupt_data",
"scope.scope.attributes": "corrupt_data",
},
attributes={
"http.request.method": "PATCH",
"http.status_code": "404",
"id": "1",
"scope.scope.scope": "corrupt_data",
},
),
Traces(
@@ -940,6 +959,7 @@ def generate_traces_with_corrupt_metadata() -> list[Traces]:
"host.name": "linux-001",
"cloud.provider": "integration",
"cloud.account.id": "001",
"scope.scope": "corrupt_data",
},
attributes={
"message.type": "SENT",
@@ -947,6 +967,8 @@ def generate_traces_with_corrupt_metadata() -> list[Traces]:
"messaging.message.id": "001",
"duration_nano": "corrupt_data",
"id": 1,
"scope": "corrupt_data",
"scope.attributes": "corrupt_data",
},
),
]

View File

@@ -286,6 +286,10 @@ class Traces(ABC):
db_operation: str
has_error: bool
is_remote: str
scope_name: str
scope_version: str
scope_string: dict[str, str]
scope_json: dict[str, Any]
resource: list[TracesResource]
tag_attributes: list[TracesTagAttributes]
@@ -311,6 +315,9 @@ class Traces(ABC):
links: list[TracesLink] = [],
trace_state: str = "",
flags: np.uint32 = 0,
scope_name: str = "",
scope_version: str = "",
scope_attributes: dict[str, Any] = {},
resource_write_mode: Literal["legacy_only", "dual_write"] = "dual_write",
) -> None:
if timestamp is None:
@@ -392,6 +399,30 @@ class Traces(ABC):
# Calculate resource fingerprint
self.resource_fingerprint = LogsOrTracesFingerprint(self.resources_string).calculate()
# Process scope mirroring the InstrumentationScope on the OTLP span.
self.scope_name = scope_name
self.scope_version = scope_version
self.scope_string = {k: str(v) for k, v in scope_attributes.items()}
self.scope_json = {
"name": self.scope_name,
"version": self.scope_version,
"attributes": self.scope_string,
}
for k, v in self.scope_string.items():
self.tag_attributes.append(
TracesTagAttributes(
timestamp=timestamp,
tag_key=k,
tag_type="scope",
tag_data_type="string",
string_value=v,
number_value=None,
)
)
self.attribute_keys.append(
TracesResourceOrAttributeKeys(name=k, datatype="string", tag_type="scope")
)
# Process attributes by type and populate custom fields
self.attribute_string = {}
self.attributes_number = {}
@@ -644,6 +675,7 @@ class Traces(ABC):
self.has_error,
self.is_remote,
self.resource_json,
self.scope_json,
],
dtype=object,
)
@@ -675,6 +707,9 @@ class Traces(ABC):
attributes=data.get("attributes", {}),
trace_state=data.get("trace_state", ""),
flags=data.get("flags", 0),
scope_name=data.get("scope_name", ""),
scope_version=data.get("scope_version", ""),
scope_attributes=data.get("scope_attributes", {}),
)
@classmethod
@@ -814,6 +849,7 @@ def insert_traces_to_clickhouse(conn, traces: list[Traces]) -> None:
"has_error",
"is_remote",
"resource",
"scope",
],
data=[trace.np_arr() for trace in traces],
)

View File

@@ -709,6 +709,31 @@ def test_traces_list(
x[1].trace_id,
], # type: Callable[[List[Traces]], List[Any]]
),
# Case 9: filter on a scope attribute. Only x[1] carries the scope
# attribute telemetry.sdk.language='cpp'. The filter must resolve
# against the scope JSON column's attributes and ignore the
# attribute/resource keys that look like scope fields (e.g.
# "scope.name", "scope.scope.scope") as well as the scope attribute
# keys that collide with the JSON sub-columns (name/version/attributes).
pytest.param(
{
"type": "builder_query",
"spec": {
"name": "A",
"signal": "traces",
"disabled": False,
"selectFields": [{"name": "timestamp"}],
"filter": {"expression": "scope.telemetry.sdk.language = 'cpp'"},
"limit": 1,
},
},
HTTPStatus.OK,
lambda x: [
x[1].span_id,
format_timestamp(x[1].timestamp),
x[1].trace_id,
], # type: Callable[[List[Traces]], List[Any]]
),
],
)
def test_traces_list_with_corrupt_data(
@@ -755,6 +780,103 @@ def test_traces_list_with_corrupt_data(
assert data[key] == value
@pytest.mark.parametrize(
"filter_expression,expected_index",
[
# Filter on a scope attribute resolves against the scope JSON column's attributes object.
pytest.param("scope.telemetry.sdk.language = 'python'", 1),
pytest.param("scope.telemetry.sdk.language = 'go'", 0),
],
)
def test_traces_list_with_scope_filter(
signoz: types.SigNoz,
create_user_admin: None, # pylint: disable=unused-argument
get_token: Callable[[str, str], str],
insert_traces: Callable[[list[Traces]], None],
filter_expression: str,
expected_index: int,
) -> None:
"""
Setup:
Insert 2 spans from different instrumentation scopes.
Tests:
Filtering on a scope attribute returns only the matching span.
"""
checkout_trace_id = TraceIdGenerator.trace_id()
checkout_span_id = TraceIdGenerator.span_id()
payment_trace_id = TraceIdGenerator.trace_id()
payment_span_id = TraceIdGenerator.span_id()
now = datetime.now(tz=UTC).replace(second=0, microsecond=0)
traces = [
Traces(
timestamp=now - timedelta(seconds=4),
duration=timedelta(seconds=2),
trace_id=checkout_trace_id,
span_id=checkout_span_id,
parent_span_id="",
name="GET /checkout",
kind=TracesKind.SPAN_KIND_SERVER,
status_code=TracesStatusCode.STATUS_CODE_OK,
resources={"service.name": "checkout"},
attributes={"http.request.method": "GET"},
scope_name="io.signoz.checkout",
scope_version="2.3.1",
scope_attributes={"telemetry.sdk.language": "go"},
),
Traces(
timestamp=now - timedelta(seconds=2),
duration=timedelta(seconds=1),
trace_id=payment_trace_id,
span_id=payment_span_id,
parent_span_id="",
name="POST /pay",
kind=TracesKind.SPAN_KIND_SERVER,
status_code=TracesStatusCode.STATUS_CODE_OK,
resources={"service.name": "payment"},
attributes={"http.request.method": "POST"},
scope_name="io.signoz.payment",
scope_version="4.5.6",
scope_attributes={"telemetry.sdk.language": "python"},
),
]
insert_traces(traces)
token = get_token(USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD)
response = make_query_request(
signoz,
token,
start_ms=int((datetime.now(tz=UTC) - timedelta(minutes=5)).timestamp() * 1000),
end_ms=int(datetime.now(tz=UTC).timestamp() * 1000),
request_type="raw",
queries=[
{
"type": "builder_query",
"spec": {
"name": "A",
"signal": "traces",
"disabled": False,
"selectFields": [{"name": "timestamp"}],
"filter": {"expression": filter_expression},
"limit": 10,
},
}
],
)
assert response.status_code == HTTPStatus.OK
rows = response.json()["data"]["data"]["results"][0]["rows"]
# Exactly one span matches the scope filter.
assert rows is not None
assert len(rows) == 1
expected = traces[expected_index]
assert rows[0]["data"]["span_id"] == expected.span_id
assert rows[0]["data"]["trace_id"] == expected.trace_id
def _verify_events_links_full(rows: list[dict], traces: list[Traces]) -> None:
"""Empty-selectFields case: events/links arrive parsed into structured objects.
Every row's events/links should match the fixture's stored parsed shape