Преглед изворни кода

refactor(admin): 重构 WafLogDataCleanService 以提高可维护性和扩展性

- 引入 FieldPathConfig 和 apiFieldMappings 以集中配置字段路径
- 重构 ParseWafLogExtraData 方法,使用配置驱动解析逻辑- 优化数据结构,简化 CleanedExtraData
- 移除未使用的代码和冗余逻辑- 提高代码可读性和可维护性
fusu пре 3 дана
родитељ
комит
23f026e9c1
1 измењених фајлова са 156 додато и 315 уклоњено
  1. 156 315
      internal/service/admin/waflogdataclean.go

+ 156 - 315
internal/service/admin/waflogdataclean.go

@@ -6,379 +6,220 @@ import (
 	"github.com/go-nunu/nunu-layout-advanced/internal/service"
 	"github.com/tidwall/gjson"
 	"go.uber.org/zap"
+	"strconv"
 	"strings"
 )
 
+// --- 1. 集中化字段路径配置 ---
+
+// FieldPathConfig 定义了提取一个特定字段所需的所有信息
+type FieldPathConfig struct {
+	// Paths 是一个优先级列表,解析器会从前到后尝试这些路径
+	Paths []string
+	// FieldType 指示字段的预期类型,用于特殊处理(如'array_object', 'array_string', 'bool')
+	FieldType string
+}
+
+// apiFieldMappings 是驱动整个解析逻辑的核心配置
+// Key: API名称的关键字 (e.g., "web", "tcp", "allowAndDeny")
+// Value: 一个映射,定义了该API类型下需要提取的各个字段及其查找路径
+var apiFieldMappings = map[string]map[string]FieldPathConfig{
+	"web": {
+		"Comment":     {Paths: []string{"comment", "data.comment", "desc"}},
+		"Port":        {Paths: []string{"port", "data.port"}},
+		"Domain":      {Paths: []string{"domain", "data.domain", "host"}},
+		"IsHttps":     {Paths: []string{"isHttps", "data.isHttps"}},
+		"RuleID":      {Paths: []string{"ruleId", "data.ruleId", "ids", "data.ids"}, FieldType: "array_int"},
+		"BackendList": {Paths: []string{"backendList", "data.backendList", "backends"}, FieldType: "array_object"},
+	},
+	"tcp": {
+		"Comment":         {Paths: []string{"comment", "data.comment", "desc"}},
+		"Port":            {Paths: []string{"port", "data.port"}},
+		"RuleID":          {Paths: []string{"ruleId", "data.ruleId", "ids", "data.ids"}, FieldType: "array_int"},
+		"AddrBackendList": {Paths: []string{"addrBackendList", "data.addrBackendList"}, FieldType: "array_string"},
+	},
+	"udp": { // UDP 和 TCP 结构类似
+		"Comment":         {Paths: []string{"comment", "data.comment", "desc"}},
+		"Port":            {Paths: []string{"port", "data.port"}},
+		"RuleID":          {Paths: []string{"ruleId", "data.ruleId", "ids", "data.ids"}, FieldType: "array_int"},
+		"AddrBackendList": {Paths: []string{"addrBackendList", "data.addrBackendList"}, FieldType: "array_string"},
+	},
+	"globalLimit": {
+		"Comment": {Paths: []string{"comment", "data.comment", "desc"}},
+		"RuleID":  {Paths: []string{"ruleId", "data.ruleId", "ids", "data.ids"}, FieldType: "array_int"},
+	},
+	"allowAndDeny": {
+		"AllowAndDenyIps": {Paths: []string{"ip", "ips"}, FieldType: "array_string"},
+		"RuleID":          {Paths: []string{"ruleId", "data.ruleId", "ids", "data.ids"}, FieldType: "array_int"},
+	},
+	"ccIpList": {
+		"AllowAndDenyIps": {Paths: []string{"ip", "ips","newIp"}}, // 精确指定 ccIpList 只查找 "ip"
+		"RuleID":          {Paths: []string{"ruleId", "data.ruleId", "ids", "data.ids"}, FieldType: "array_int"},
+	},
+	// "分配网关组" 的日志通常不包含用户层面的业务数据,所以这里不定义
+}
+
+// --- 2. 清洗后的统一数据结构 ---
+
+type CleanedExtraData struct {
+	Port            string
+	Domain          string
+	Comment         string
+	IsHttps         int
+	RuleID          []int64
+	AddrBackendList []string
+	CustomHost      []string
+	AllowAndDenyIps string
+}
+
+// --- 3. 服务实现 ---
+
 type WafLogDataCleanService interface {
 	ParseWafLogExtraData(extraDataBytes json.RawMessage, apiName string) CleanedExtraData
 	FormatBackendList(backendList interface{}) string
 }
+
 func NewWafLogDataCleanService(
-    service *service.Service,
+	service *service.Service,
 ) WafLogDataCleanService {
 	return &wafLogDataCleanService{
-		Service:        service,
-
+		Service: service,
 	}
 }
 
 type wafLogDataCleanService struct {
 	*service.Service
-
-}
-
-// BackendInfo 后端服务器信息
-type BackendInfo struct {
-	Addr       string `json:"addr,omitempty"`       // 后端地址
-	CustomHost string `json:"customHost,omitempty"` // 自定义Host头
-	IsHttps    int    `json:"isHttps,omitempty"`    // 是否为HTTPS
 }
 
-// CleanedExtraData 使用动态结构存储解析后的数据
-type CleanedExtraData struct {
-	// 核心字段 - 新的数组结构
-	BackendList     []BackendInfo `json:"backendList,omitempty"`     // 完整的后端信息数组
-	
-	// 向后兼容字段
-	AddrBackendList []string `json:"addrBackendList,omitempty"` // 只包含地址的数组
-	CustomHostList  []string `json:"customHostList,omitempty"`  // 只包含customHost的数组
-	CustomHost      []string `json:"customHost,omitempty"`      // customHost数组,与CustomHostList相同
-	
-	// 基础字段
-	Port            string   `json:"port,omitempty"`
-	Domain          string   `json:"domain,omitempty"`
-	Comment         string   `json:"comment,omitempty"`
-	
-	// 扩展字段
-	UID             int64    `json:"uid,omitempty"`
-	HostID          int64    `json:"hostId,omitempty"`
-	Proxy           bool     `json:"proxy,omitempty"`
-	IsHttps         int      `json:"isHttps,omitempty"`
-	RuleID          []int64    `json:"ruleId,omitempty"`
-
-	// 其他字段
-	AllowAndDenyIps string   `json:"allowAndDenyIps,omitempty"`
-	
-	// 动态字段存储,用于存储任意其他字段
-	DynamicFields map[string]interface{} `json:"dynamicFields,omitempty"`
-	
-	// 原始数据备份,用于调试和回溯
-	RawData map[string]interface{} `json:"rawData,omitempty"`
-}
-
-
-
-
-// parseWafLogExtraData 使用gjson解析动态JSON结构,简洁高效
+// ParseWafLogExtraData 使用配置驱动的 gjson 解析,兼具灵活性和可维护性
 func (s *wafLogDataCleanService) ParseWafLogExtraData(extraDataBytes json.RawMessage, apiName string) CleanedExtraData {
-	var result CleanedExtraData
-	result.DynamicFields = make(map[string]interface{})
-	
-	if len(extraDataBytes) == 0 {
-		return result
+	var cleaned CleanedExtraData
+	if len(extraDataBytes) == 0 || !gjson.Valid(string(extraDataBytes)) {
+		if len(extraDataBytes) > 0 {
+			s.Logger.Warn("ExtraData 不是有效的JSON", zap.String("raw_data", string(extraDataBytes)))
+		}
+		return cleaned
 	}
-
 	jsonStr := string(extraDataBytes)
-	if !gjson.Valid(jsonStr) {
-		s.Logger.Warn("ExtraData 不是有效的JSON", zap.String("raw_data", jsonStr))
-		return result
-	}
-
-	// 解析并保存原始数据
-	var rawData map[string]interface{}
-	json.Unmarshal(extraDataBytes, &rawData)
-	result.RawData = rawData
-
-	// 使用gjson进行智能字段提取
-	s.extractWithGjson(jsonStr, apiName, &result)
-
-	return result
-}
-
-// extractWithGjson 使用gjson进行智能字段提取
-func (s *wafLogDataCleanService) extractWithGjson(jsonStr, apiName string, result *CleanedExtraData) {
-	// 提取顶层字段
-	if uid := gjson.Get(jsonStr, "uid"); uid.Exists() {
-		result.UID = uid.Int()
-	}
-	if hostId := gjson.Get(jsonStr, "hostId"); hostId.Exists() {
-		result.HostID = hostId.Int()
-	}
-	
-	// 定义常见字段路径的优先级列表
-	fieldPaths := map[string][]string{
-		"comment": {"comment", "data.comment", "desc", "description", "remark", "note"},
-		"port":    {"port", "data.port", "config.port", "server.port"},
-		"domain":  {"domain", "data.domain", "host", "data.host", "hostname"},
-		"proxy":   {"proxy", "data.proxy"},
-		"isHttps": {"isHttps", "data.isHttps"},
-		"ids":     {"ids", "data.ids", "ruleIds", "data.ruleIds", "ruleId", "data.ruleId"},
-		"ip":      {"ip","newIp","ips"},
-	}
-	
-	// 提取基础字段
-	for fieldName, paths := range fieldPaths {
-		value := s.getFirstValidPath(jsonStr, paths)
-		if value != "" {
-			switch fieldName {
-			case "comment":
-				result.Comment = value
-			case "port":
-				result.Port = value
-			case "domain":
-				result.Domain = value
-			case "proxy":
-				result.Proxy = gjson.Get(jsonStr, s.getFirstValidPathName(jsonStr, paths)).Bool()
-			case "isHttps":
-				result.IsHttps = int(gjson.Get(jsonStr, s.getFirstValidPathName(jsonStr, paths)).Int())
-			case "ids":
-				result.RuleID = s.extractRuleIDs(jsonStr, paths)
-			case "ip":
-				result.AllowAndDenyIps = value
-
-			}
 
+	// 根据 apiName 找到对应的字段映射配置
+	var fieldConfig map[string]FieldPathConfig
+	for keyword, config := range apiFieldMappings {
+		if strings.Contains(strings.ToLower(apiName), keyword) {
+			fieldConfig = config
+			break
 		}
 	}
-	
-	// 智能提取 backendList
-	s.extractBackendListWithGjson(jsonStr, apiName, result)
-	
-	// 提取所有其他动态字段
-	s.extractDynamicFields(jsonStr, result)
-}
 
-// getFirstValidPath 从多个路径中获取第一个有效值
-func (s *wafLogDataCleanService) getFirstValidPath(jsonStr string, paths []string) string {
-	for _, path := range paths {
-		if value := gjson.Get(jsonStr, path); value.Exists() && value.String() != "" {
-			return value.String()
-		}
+	// 如果没有找到配置,直接返回空结构
+	if fieldConfig == nil {
+		return cleaned
 	}
-	return ""
-}
 
-// getFirstValidPathName 获取第一个有效路径的名称
-func (s *wafLogDataCleanService) getFirstValidPathName(jsonStr string, paths []string) string {
-	for _, path := range paths {
-		if gjson.Get(jsonStr, path).Exists() {
-			return path
-		}
+	// 通用、循环地提取字段
+	for fieldName, config := range fieldConfig {
+		s.extractField(jsonStr, fieldName, config, &cleaned)
 	}
-	return ""
-}
 
-// extractRuleIDs 提取规则ID数组
-func (s *wafLogDataCleanService) extractRuleIDs(jsonStr string, paths []string) []int64 {
-	var ruleIDs []int64
-	
-	for _, path := range paths {
-		ruleResult := gjson.Get(jsonStr, path)
-		if !ruleResult.Exists() {
-			continue
-		}
-		
-		switch {
-		case ruleResult.IsArray():
-			// 如果是数组,遍历提取每个ID
-			ruleResult.ForEach(func(key, value gjson.Result) bool {
-				if id := value.Int(); id > 0 {
-					ruleIDs = append(ruleIDs, id)
-				}
-				return true
-			})
-		default:
-			// 如果是单个值,添加到数组中
-			if id := ruleResult.Int(); id > 0 {
-				ruleIDs = append(ruleIDs, id)
-			}
-		}
-		
-		// 找到有效数据就退出
-		if len(ruleIDs) > 0 {
-			break
-		}
-	}
-	
-	return ruleIDs
+	return cleaned
 }
 
-// extractBackendListWithGjson 使用gjson智能提取后端列表
-func (s *wafLogDataCleanService) extractBackendListWithGjson(jsonStr, apiName string, result *CleanedExtraData) {
-	// 定义可能的后端列表字段路径
-	backendPaths := []string{
-		"data.backendList", "backendList", "backends", "data.backends",
-		"backend_list", "data.backend_list", "servers", "data.servers",
-		"upstreams", "data.upstreams", "targets", "data.targets",
-	}
-	
-	for _, path := range backendPaths {
-		backendResult := gjson.Get(jsonStr, path)
-		if !backendResult.Exists() {
-			continue
-		}
-		
-		// 根据数据类型进行处理
-		switch {
-		case backendResult.IsArray():
-			s.processArrayBackends(backendResult, result)
-
-		}
-		
-		// 找到有效数据就退出
-		if len(result.AddrBackendList) > 0 || len(result.CustomHostList) > 0 {
+// extractField 是一个通用的字段提取辅助函数
+func (s *wafLogDataCleanService) extractField(jsonStr, fieldName string, config FieldPathConfig, cleaned *CleanedExtraData) {
+	// 找到第一个有效的路径和其结果
+	var validPathResult gjson.Result
+	for _, path := range config.Paths {
+		result := gjson.Get(jsonStr, path)
+		if result.Exists() {
+			validPathResult = result
 			break
 		}
 	}
-}
-
-// processArrayBackends 处理数组格式的后端列表
-func (s *wafLogDataCleanService) processArrayBackends(backendResult gjson.Result, result *CleanedExtraData) {
-	backendResult.ForEach(func(key, value gjson.Result) bool {
-		if value.IsObject() {
-			// 创建BackendInfo结构
-			backend := BackendInfo{}
-
-			isHttps := gjson.Get(value.Raw, "isHttps").Int()
-
-			// 尝试提取地址字段
-			addr := s.getFirstValidPath(value.Raw, []string{"addr", "address", "host", "server", "endpoint", "url"})
-
-			if isHttps == 1 {
-				addr = "https://" + addr
-			}else {
-				addr = "http://" + addr
-			}
-
-			if addr != "" {
-				backend.Addr = addr
-				result.AddrBackendList = append(result.AddrBackendList, addr)
-			}
 
-			// 提取customHost
-			if customHost := gjson.Get(value.Raw, "customHost").String(); customHost != "" {
-				backend.CustomHost = customHost
-				result.CustomHostList = append(result.CustomHostList, customHost)
-				result.CustomHost = append(result.CustomHost, customHost)
-			}else {
-				result.CustomHost = append(result.CustomHost, "     ")
-			}
+	if !validPathResult.Exists() {
+		return // 如果所有路径都找不到,直接返回
+	}
 
-			// 只有当有有效数据时才添加到BackendList
-			if backend.Addr != "" || backend.CustomHost != "" {
-				result.BackendList = append(result.BackendList, backend)
-			}
+	// 根据字段名称和类型将结果赋值给 CleanedExtraData
+	switch fieldName {
+	case "Comment":
+		cleaned.Comment = validPathResult.String()
+	case "Port":
+		cleaned.Port = validPathResult.String()
+	case "Domain":
+		cleaned.Domain = validPathResult.String()
+	case "IsHttps":
+		cleaned.IsHttps = int(validPathResult.Int())
+	case "RuleID":
+		if validPathResult.IsArray() {
+			validPathResult.ForEach(func(_, value gjson.Result) bool {
+				cleaned.RuleID = append(cleaned.RuleID, value.Int())
+				return true
+			})
 		} else {
-			// 直接作为地址处理
-			if addr := value.String(); addr != "" {
-				result.AddrBackendList = append(result.AddrBackendList, addr)
-				result.BackendList = append(result.BackendList, BackendInfo{Addr: addr})
-			}
+			cleaned.RuleID = append(cleaned.RuleID, validPathResult.Int())
+		}
+	case "BackendList": // 特殊处理对象数组
+		if validPathResult.IsArray() {
+			validPathResult.ForEach(func(_, value gjson.Result) bool {
+				if value.IsObject() {
+					addr := gjson.Get(value.Raw, "addr").String()
+					customHost := gjson.Get(value.Raw, "customHost").String()
+					isHttps := gjson.Get(value.Raw, "isHttps").Int()
+					if isHttps == 1 {
+						addr = "https://" + addr
+					} else {
+						addr = "http://" + addr
+					}
+					cleaned.AddrBackendList = append(cleaned.AddrBackendList, addr)
+					cleaned.CustomHost = append(cleaned.CustomHost, customHost)
+				}
+				return true
+			})
 		}
-		return true
-	})
-}
-
-
-// extractDynamicFields 提取所有动态字段到DynamicFields中
-func (s *wafLogDataCleanService) extractDynamicFields(jsonStr string, result *CleanedExtraData) {
-	// 已知的核心字段,不放入动态字段中
-	knownFields := map[string]bool{
-		"comment": true, "port": true, "domain": true, 
-		"backendList": true, "backends": true, "backend_list": true,
-		"data": true, // data字段的内容会被单独处理
-	}
-	
-	// 遍历顶层字段
-	gjson.Parse(jsonStr).ForEach(func(key, value gjson.Result) bool {
-		fieldName := key.String()
-		if !knownFields[fieldName] {
-			// 将未知字段存储到动态字段中
-			result.DynamicFields[fieldName] = value.Value()
+	case "AddrBackendList": // 处理字符串数组
+		if validPathResult.IsArray() {
+			validPathResult.ForEach(func(_, value gjson.Result) bool {
+				cleaned.AddrBackendList = append(cleaned.AddrBackendList, value.String())
+				return true
+			})
 		}
-		return true
-	})
-	
-	// 特殊处理data字段中的未知字段
-	dataResult := gjson.Get(jsonStr, "data")
-	if dataResult.Exists() && dataResult.IsObject() {
-		dataKnownFields := map[string]bool{
-			"port": true, "domain": true, "backendList": true, 
-			"backends": true, "backend_list": true,
+	case "AllowAndDenyIps":
+		if validPathResult.IsArray() {
+			var ips []string
+			validPathResult.ForEach(func(_, value gjson.Result) bool {
+				ips = append(ips, value.String())
+				return true
+			})
+			cleaned.AllowAndDenyIps = strings.Join(ips, ", ")
+		} else {
+			cleaned.AllowAndDenyIps = validPathResult.String()
 		}
-		
-		dataResult.ForEach(func(key, value gjson.Result) bool {
-			fieldName := key.String()
-			if !dataKnownFields[fieldName] {
-				// 使用data.前缀避免冲突
-				result.DynamicFields["data."+fieldName] = value.Value()
-			}
-			return true
-		})
 	}
 }
 
-
-
-// formatBackendList 格式化后端地址列表
+// FormatBackendList 格式化后端地址列表(已简化)
 func (s *wafLogDataCleanService) FormatBackendList(backendList interface{}) string {
 	if backendList == nil {
 		return ""
 	}
 
 	switch v := backendList.(type) {
-	case string:
-		return v
 	case []string:
-		if len(v) == 0 {
-			return ""
-		}
 		return strings.Join(v, ", ")
 	case []int64:
-		// 处理 []int64 类型的数组(如 RuleId)
 		if len(v) == 0 {
 			return ""
 		}
 		var strList []string
 		for _, id := range v {
-			strList = append(strList, fmt.Sprintf("%d", id))
-		}
-		return strings.Join(strList, ", ")
-	case []interface{}:
-		// 处理 []interface{} 类型的数组
-		if len(v) == 0 {
-			return ""
-		}
-		var strList []string
-		for _, item := range v {
-			if str := fmt.Sprintf("%v", item); str != "" && str != "<nil>" {
-				strList = append(strList, str)
-			}
+			strList = append(strList, strconv.FormatInt(id, 10))
 		}
 		return strings.Join(strList, ", ")
+	case string:
+		return v
 	default:
-		// 对于其他类型,先转换为字符串再处理
-		str := fmt.Sprintf("%v", v)
-		// 处理 Go 数组格式 [item1 item2] -> item1, item2
-		if strings.HasPrefix(str, "[") && strings.HasSuffix(str, "]") {
-			// 移除方括号
-			content := strings.Trim(str, "[]")
-			if content != "" {
-				// 按空格分割并用逗号连接
-				parts := strings.Fields(content)
-				if len(parts) > 1 {
-					return strings.Join(parts, ", ")
-				}
-				return content
-			}
-		}
-		// 处理其他包含空格的字符串
-		if strings.Contains(str, " ") && !strings.Contains(str, "\n") {
-			parts := strings.Fields(str)
-			if len(parts) > 1 {
-				return strings.Join(parts, ", ")
-			}
-		}
-		return str
+		// 其他类型直接转为字符串,作为最后的兼容手段
+		return fmt.Sprintf("%v", v)
 	}
 }