Files
signoz/pkg/sqlschema/normalizer.go
2026-03-17 15:46:04 +05:30

163 lines
3.1 KiB
Go

package sqlschema
import (
"fmt"
"hash/fnv"
"strings"
)
type whereNormalizer struct {
input string
}
func (n *whereNormalizer) hash() string {
hasher := fnv.New32a()
_, _ = hasher.Write([]byte(n.normalize()))
return fmt.Sprintf("%08x", hasher.Sum32())
}
func (n *whereNormalizer) normalize() string {
where := strings.TrimSpace(n.input)
where = n.stripOuterParentheses(where)
var output strings.Builder
output.Grow(len(where))
for i := 0; i < len(where); i++ {
switch where[i] {
case ' ', '\t', '\n', '\r':
if output.Len() > 0 {
last := output.String()[output.Len()-1]
if last != ' ' {
output.WriteByte(' ')
}
}
case '\'':
end := n.consumeSingleQuotedLiteral(where, i, &output)
i = end
case '"':
token, end := n.consumeDoubleQuotedToken(where, i)
output.WriteString(token)
i = end
default:
output.WriteByte(where[i])
}
}
return strings.TrimSpace(output.String())
}
func (n *whereNormalizer) stripOuterParentheses(s string) string {
for {
s = strings.TrimSpace(s)
if len(s) < 2 || s[0] != '(' || s[len(s)-1] != ')' || !n.hasWrappingParentheses(s) {
return s
}
s = s[1 : len(s)-1]
}
}
func (n *whereNormalizer) hasWrappingParentheses(s string) bool {
depth := 0
inSingleQuotedLiteral := false
inDoubleQuotedToken := false
for i := 0; i < len(s); i++ {
switch s[i] {
case '\'':
if inDoubleQuotedToken {
continue
}
if inSingleQuotedLiteral && i+1 < len(s) && s[i+1] == '\'' {
i++
continue
}
inSingleQuotedLiteral = !inSingleQuotedLiteral
case '"':
if inSingleQuotedLiteral {
continue
}
if inDoubleQuotedToken && i+1 < len(s) && s[i+1] == '"' {
i++
continue
}
inDoubleQuotedToken = !inDoubleQuotedToken
case '(':
if inSingleQuotedLiteral || inDoubleQuotedToken {
continue
}
depth++
case ')':
if inSingleQuotedLiteral || inDoubleQuotedToken {
continue
}
depth--
if depth == 0 && i != len(s)-1 {
return false
}
}
}
return depth == 0
}
func (n *whereNormalizer) consumeSingleQuotedLiteral(s string, start int, output *strings.Builder) int {
output.WriteByte(s[start])
for i := start + 1; i < len(s); i++ {
output.WriteByte(s[i])
if s[i] == '\'' {
if i+1 < len(s) && s[i+1] == '\'' {
i++
output.WriteByte(s[i])
continue
}
return i
}
}
return len(s) - 1
}
func (n *whereNormalizer) consumeDoubleQuotedToken(s string, start int) (string, int) {
var ident strings.Builder
for i := start + 1; i < len(s); i++ {
if s[i] == '"' {
if i+1 < len(s) && s[i+1] == '"' {
ident.WriteByte('"')
i++
continue
}
if n.isSimpleUnquotedIdentifier(ident.String()) {
return ident.String(), i
}
return s[start : i+1], i
}
ident.WriteByte(s[i])
}
return s[start:], len(s) - 1
}
func (n *whereNormalizer) isSimpleUnquotedIdentifier(s string) bool {
if s == "" || strings.ToLower(s) != s {
return false
}
for i := 0; i < len(s); i++ {
ch := s[i]
if (ch >= 'a' && ch <= 'z') || ch == '_' {
continue
}
if i > 0 && ch >= '0' && ch <= '9' {
continue
}
return false
}
return true
}