waflogdataclean.go 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. package admin
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "github.com/go-nunu/nunu-layout-advanced/internal/service"
  6. "github.com/tidwall/gjson"
  7. "go.uber.org/zap"
  8. "strconv"
  9. "strings"
  10. )
  11. // --- 1. 集中化字段路径配置 ---
  12. // FieldPathConfig 定义了提取一个特定字段所需的所有信息
  13. type FieldPathConfig struct {
  14. // Paths 是一个优先级列表,解析器会从前到后尝试这些路径
  15. Paths []string
  16. // FieldType 指示字段的预期类型,用于特殊处理(如'array_object', 'array_string', 'bool')
  17. FieldType string
  18. }
  19. // apiFieldMappings 是驱动整个解析逻辑的核心配置
  20. // Key: API名称的关键字 (e.g., "web", "tcp", "allowAndDeny")
  21. // Value: 一个映射,定义了该API类型下需要提取的各个字段及其查找路径
  22. var apiFieldMappings = map[string]map[string]FieldPathConfig{
  23. "web": {
  24. "Comment": {Paths: []string{"comment", "data.comment", "desc"}},
  25. "Port": {Paths: []string{"port", "data.port"}},
  26. "Domain": {Paths: []string{"domain", "data.domain", "host"}},
  27. "IsHttps": {Paths: []string{"isHttps", "data.isHttps"}},
  28. "RuleID": {Paths: []string{"ruleId", "data.ruleId", "ids", "data.ids"}, FieldType: "array_int"},
  29. "BackendList": {Paths: []string{"backendList", "data.backendList", "backends"}, FieldType: "array_object"},
  30. },
  31. "tcp": {
  32. "Comment": {Paths: []string{"comment", "data.comment", "desc"}},
  33. "Port": {Paths: []string{"port", "data.port"}},
  34. "RuleID": {Paths: []string{"ruleId", "data.ruleId", "ids", "data.ids"}, FieldType: "array_int"},
  35. "AddrBackendList": {Paths: []string{"addrBackendList", "data.addrBackendList"}, FieldType: "array_string"},
  36. },
  37. "udp": { // UDP 和 TCP 结构类似
  38. "Comment": {Paths: []string{"comment", "data.comment", "desc"}},
  39. "Port": {Paths: []string{"port", "data.port"}},
  40. "RuleID": {Paths: []string{"ruleId", "data.ruleId", "ids", "data.ids"}, FieldType: "array_int"},
  41. "AddrBackendList": {Paths: []string{"addrBackendList", "data.addrBackendList"}, FieldType: "array_string"},
  42. },
  43. "globalLimit": {
  44. "Comment": {Paths: []string{"comment", "data.comment", "desc"}},
  45. "RuleID": {Paths: []string{"ruleId", "data.ruleId", "ids", "data.ids"}, FieldType: "array_int"},
  46. },
  47. "allowAndDeny": {
  48. "AllowAndDenyIps": {Paths: []string{"ip", "ips"}, FieldType: "array_string"},
  49. "RuleID": {Paths: []string{"ruleId", "data.ruleId", "ids", "data.ids"}, FieldType: "array_int"},
  50. },
  51. "ccIpList": {
  52. "AllowAndDenyIps": {Paths: []string{"ip", "ips","newIp"}}, // 精确指定 ccIpList 只查找 "ip"
  53. "RuleID": {Paths: []string{"ruleId", "data.ruleId", "ids", "data.ids"}, FieldType: "array_int"},
  54. },
  55. // "分配网关组" 的日志通常不包含用户层面的业务数据,所以这里不定义
  56. }
  57. // --- 2. 清洗后的统一数据结构 ---
  58. type CleanedExtraData struct {
  59. Port string
  60. Domain string
  61. Comment string
  62. IsHttps int
  63. RuleID []int64
  64. AddrBackendList []string
  65. CustomHost []string
  66. AllowAndDenyIps string
  67. }
  68. // --- 3. 服务实现 ---
  69. type WafLogDataCleanService interface {
  70. ParseWafLogExtraData(extraDataBytes json.RawMessage, apiName string) CleanedExtraData
  71. FormatBackendList(backendList interface{}) string
  72. }
  73. func NewWafLogDataCleanService(
  74. service *service.Service,
  75. ) WafLogDataCleanService {
  76. return &wafLogDataCleanService{
  77. Service: service,
  78. }
  79. }
  80. type wafLogDataCleanService struct {
  81. *service.Service
  82. }
  83. // ParseWafLogExtraData 使用配置驱动的 gjson 解析,兼具灵活性和可维护性
  84. func (s *wafLogDataCleanService) ParseWafLogExtraData(extraDataBytes json.RawMessage, apiName string) CleanedExtraData {
  85. var cleaned CleanedExtraData
  86. if len(extraDataBytes) == 0 || !gjson.Valid(string(extraDataBytes)) {
  87. if len(extraDataBytes) > 0 {
  88. s.Logger.Warn("ExtraData 不是有效的JSON", zap.String("raw_data", string(extraDataBytes)))
  89. }
  90. return cleaned
  91. }
  92. jsonStr := string(extraDataBytes)
  93. // 根据 apiName 找到对应的字段映射配置
  94. var fieldConfig map[string]FieldPathConfig
  95. for keyword, config := range apiFieldMappings {
  96. if strings.Contains(strings.ToLower(apiName), keyword) {
  97. fieldConfig = config
  98. break
  99. }
  100. }
  101. // 如果没有找到配置,直接返回空结构
  102. if fieldConfig == nil {
  103. return cleaned
  104. }
  105. // 通用、循环地提取字段
  106. for fieldName, config := range fieldConfig {
  107. s.extractField(jsonStr, fieldName, config, &cleaned)
  108. }
  109. return cleaned
  110. }
  111. // extractField 是一个通用的字段提取辅助函数
  112. func (s *wafLogDataCleanService) extractField(jsonStr, fieldName string, config FieldPathConfig, cleaned *CleanedExtraData) {
  113. // 找到第一个有效的路径和其结果
  114. var validPathResult gjson.Result
  115. for _, path := range config.Paths {
  116. result := gjson.Get(jsonStr, path)
  117. if result.Exists() {
  118. validPathResult = result
  119. break
  120. }
  121. }
  122. if !validPathResult.Exists() {
  123. return // 如果所有路径都找不到,直接返回
  124. }
  125. // 根据字段名称和类型将结果赋值给 CleanedExtraData
  126. switch fieldName {
  127. case "Comment":
  128. cleaned.Comment = validPathResult.String()
  129. case "Port":
  130. cleaned.Port = validPathResult.String()
  131. case "Domain":
  132. cleaned.Domain = validPathResult.String()
  133. case "IsHttps":
  134. cleaned.IsHttps = int(validPathResult.Int())
  135. case "RuleID":
  136. if validPathResult.IsArray() {
  137. validPathResult.ForEach(func(_, value gjson.Result) bool {
  138. cleaned.RuleID = append(cleaned.RuleID, value.Int())
  139. return true
  140. })
  141. } else {
  142. cleaned.RuleID = append(cleaned.RuleID, validPathResult.Int())
  143. }
  144. case "BackendList": // 特殊处理对象数组
  145. if validPathResult.IsArray() {
  146. validPathResult.ForEach(func(_, value gjson.Result) bool {
  147. if value.IsObject() {
  148. addr := gjson.Get(value.Raw, "addr").String()
  149. customHost := gjson.Get(value.Raw, "customHost").String()
  150. isHttps := gjson.Get(value.Raw, "isHttps").Int()
  151. if isHttps == 1 {
  152. addr = "https://" + addr
  153. } else {
  154. addr = "http://" + addr
  155. }
  156. cleaned.AddrBackendList = append(cleaned.AddrBackendList, addr)
  157. cleaned.CustomHost = append(cleaned.CustomHost, customHost)
  158. }
  159. return true
  160. })
  161. }
  162. case "AddrBackendList": // 处理字符串数组
  163. if validPathResult.IsArray() {
  164. validPathResult.ForEach(func(_, value gjson.Result) bool {
  165. cleaned.AddrBackendList = append(cleaned.AddrBackendList, value.String())
  166. return true
  167. })
  168. }
  169. case "AllowAndDenyIps":
  170. if validPathResult.IsArray() {
  171. var ips []string
  172. validPathResult.ForEach(func(_, value gjson.Result) bool {
  173. ips = append(ips, value.String())
  174. return true
  175. })
  176. cleaned.AllowAndDenyIps = strings.Join(ips, ", ")
  177. } else {
  178. cleaned.AllowAndDenyIps = validPathResult.String()
  179. }
  180. }
  181. }
  182. // FormatBackendList 格式化后端地址列表(已简化)
  183. func (s *wafLogDataCleanService) FormatBackendList(backendList interface{}) string {
  184. if backendList == nil {
  185. return ""
  186. }
  187. switch v := backendList.(type) {
  188. case []string:
  189. return strings.Join(v, ", ")
  190. case []int64:
  191. if len(v) == 0 {
  192. return ""
  193. }
  194. var strList []string
  195. for _, id := range v {
  196. strList = append(strList, strconv.FormatInt(id, 10))
  197. }
  198. return strings.Join(strList, ", ")
  199. case string:
  200. return v
  201. default:
  202. // 其他类型直接转为字符串,作为最后的兼容手段
  203. return fmt.Sprintf("%v", v)
  204. }
  205. }