waflogdataclean.go 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. package admin
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "github.com/go-nunu/nunu-layout-advanced/internal/service"
  6. "github.com/tidwall/gjson"
  7. "go.uber.org/zap"
  8. "strconv"
  9. "strings"
  10. )
  11. // --- 1. 集中化字段路径配置 ---
  12. // FieldPathConfig 定义了提取一个特定字段所需的所有信息
  13. type FieldPathConfig struct {
  14. // Paths 是一个优先级列表,解析器会从前到后尝试这些路径
  15. Paths []string
  16. // FieldType 指示字段的预期类型,用于特殊处理(如'array_object', 'array_string', 'bool')
  17. FieldType string
  18. }
  19. // apiFieldMappings 是驱动整个解析逻辑的核心配置
  20. // Key: API名称的关键字 (e.g., "web", "tcp", "allowAndDeny")
  21. // Value: 一个映射,定义了该API类型下需要提取的各个字段及其查找路径
  22. var apiFieldMappings = map[string]map[string]FieldPathConfig{
  23. "web": {
  24. "Comment": {Paths: []string{"comment", "data.comment", "desc"}},
  25. "Port": {Paths: []string{"port", "data.port"}},
  26. "Domain": {Paths: []string{"domain", "data.domain", "host"}},
  27. "IsHttps": {Paths: []string{"isHttps", "data.isHttps"}},
  28. "RuleID": {Paths: []string{"ruleId", "data.ruleId", "ids", "data.ids"}, FieldType: "array_int"},
  29. "BackendList": {Paths: []string{"backendList", "data.backendList", "backends"}, FieldType: "array_object"},
  30. },
  31. "tcp": {
  32. "Comment": {Paths: []string{"comment", "data.comment", "desc"}},
  33. "Port": {Paths: []string{"port", "data.port"}},
  34. "RuleID": {Paths: []string{"ruleId", "data.ruleId", "ids", "data.ids"}, FieldType: "array_int"},
  35. "AddrBackendList": {Paths: []string{"backendList", "data.backendList"}, FieldType: "array_string"},
  36. },
  37. "udp": { // UDP 和 TCP 结构类似
  38. "Comment": {Paths: []string{"comment", "data.comment", "desc"}},
  39. "Port": {Paths: []string{"port", "data.port"}},
  40. "RuleID": {Paths: []string{"ruleId", "data.ruleId", "ids", "data.ids"}, FieldType: "array_int"},
  41. "AddrBackendList": {Paths: []string{"backendList", "data.backendList"}, FieldType: "array_string"},
  42. },
  43. "实例": {
  44. "Comment": {Paths: []string{"comment", "data.comment", "desc"}},
  45. "RuleID": {Paths: []string{"ruleId", "data.ruleId", "ids", "data.ids"}, FieldType: "array_int"},
  46. },
  47. "黑白名单": {
  48. "AllowAndDenyIps": {Paths: []string{"ip", "ips", "data.ip", "data.ips"}, FieldType: "array_string"},
  49. "RuleID": {Paths: []string{"ruleId", "data.ruleId", "ids", "data.ids"}, FieldType: "array_int"},
  50. "Comment": {Paths: []string{"comment", "data.comment", "desc"}},
  51. },
  52. "CC": {
  53. "AllowAndDenyIps": {Paths: []string{"ips","newIp", "data.newIp","data.ips", "ip", "data.ip" }},
  54. "RuleID": {Paths: []string{"allowOrDeny"}, FieldType: "int"},
  55. },
  56. // "分配网关组" 的日志通常不包含用户层面的业务数据,所以这里不定义
  57. }
  58. // --- 2. 清洗后的统一数据结构 ---
  59. type CleanedExtraData struct {
  60. Port string
  61. Domain string
  62. Comment string
  63. IsHttps int
  64. RuleID []int64
  65. AddrBackendList []string
  66. CustomHost []string
  67. AllowAndDenyIps string
  68. AllowOrDeny int
  69. }
  70. // --- 3. 服务实现 ---
  71. type WafLogDataCleanService interface {
  72. ParseWafLogExtraData(extraDataBytes json.RawMessage, apiName string) CleanedExtraData
  73. FormatBackendList(backendList interface{}) string
  74. }
  75. func NewWafLogDataCleanService(
  76. service *service.Service,
  77. ) WafLogDataCleanService {
  78. return &wafLogDataCleanService{
  79. Service: service,
  80. }
  81. }
  82. type wafLogDataCleanService struct {
  83. *service.Service
  84. }
  85. // ParseWafLogExtraData 使用配置驱动的 gjson 解析,兼具灵活性和可维护性
  86. func (s *wafLogDataCleanService) ParseWafLogExtraData(extraDataBytes json.RawMessage, apiName string) CleanedExtraData {
  87. var cleaned CleanedExtraData
  88. if len(extraDataBytes) == 0 || !gjson.Valid(string(extraDataBytes)) {
  89. if len(extraDataBytes) > 0 {
  90. s.Logger.Warn("ExtraData 不是有效的JSON", zap.String("raw_data", string(extraDataBytes)))
  91. }
  92. return cleaned
  93. }
  94. jsonStr := string(extraDataBytes)
  95. // 根据 apiName 找到对应的字段映射配置
  96. var fieldConfig map[string]FieldPathConfig
  97. for keyword, config := range apiFieldMappings {
  98. if strings.Contains(apiName, keyword) {
  99. fieldConfig = config
  100. break
  101. }
  102. }
  103. // 如果没有找到配置,直接返回空结构
  104. if fieldConfig == nil {
  105. return cleaned
  106. }
  107. // 通用、循环地提取字段
  108. for fieldName, config := range fieldConfig {
  109. s.extractField(jsonStr, fieldName, config, &cleaned)
  110. }
  111. return cleaned
  112. }
  113. // extractField 是一个通用的字段提取辅助函数
  114. func (s *wafLogDataCleanService) extractField(jsonStr, fieldName string, config FieldPathConfig, cleaned *CleanedExtraData) {
  115. // 找到第一个有效的路径和其结果
  116. var validPathResult gjson.Result
  117. for _, path := range config.Paths {
  118. result := gjson.Get(jsonStr, path)
  119. if result.Exists() {
  120. validPathResult = result
  121. break
  122. }
  123. }
  124. if !validPathResult.Exists() {
  125. return // 如果所有路径都找不到,直接返回
  126. }
  127. // 根据字段名称和类型将结果赋值给 CleanedExtraData
  128. switch fieldName {
  129. case "Comment":
  130. cleaned.Comment = validPathResult.String()
  131. case "Port":
  132. cleaned.Port = validPathResult.String()
  133. case "Domain":
  134. cleaned.Domain = validPathResult.String()
  135. case "IsHttps":
  136. cleaned.IsHttps = int(validPathResult.Int())
  137. case "RuleID":
  138. if validPathResult.IsArray() {
  139. validPathResult.ForEach(func(_, value gjson.Result) bool {
  140. cleaned.RuleID = append(cleaned.RuleID, value.Int())
  141. return true
  142. })
  143. } else {
  144. cleaned.RuleID = append(cleaned.RuleID, validPathResult.Int())
  145. }
  146. case "BackendList": // 特殊处理对象数组
  147. if validPathResult.IsArray() {
  148. validPathResult.ForEach(func(_, value gjson.Result) bool {
  149. if value.IsObject() {
  150. addr := gjson.Get(value.Raw, "addr").String()
  151. customHost := gjson.Get(value.Raw, "customHost").String()
  152. isHttps := gjson.Get(value.Raw, "isHttps").Int()
  153. if isHttps == 1 {
  154. addr = "https://" + addr
  155. } else {
  156. addr = "http://" + addr
  157. }
  158. cleaned.AddrBackendList = append(cleaned.AddrBackendList, addr)
  159. cleaned.CustomHost = append(cleaned.CustomHost, customHost)
  160. }
  161. return true
  162. })
  163. }
  164. case "AddrBackendList": // 处理字符串数组
  165. if validPathResult.IsArray() {
  166. validPathResult.ForEach(func(_, value gjson.Result) bool {
  167. cleaned.AddrBackendList = append(cleaned.AddrBackendList, value.String())
  168. return true
  169. })
  170. }
  171. case "AllowAndDenyIps":
  172. if validPathResult.IsArray() {
  173. var ips []string
  174. validPathResult.ForEach(func(_, value gjson.Result) bool {
  175. ips = append(ips, value.String())
  176. return true
  177. })
  178. cleaned.AllowAndDenyIps = strings.Join(ips, ", ")
  179. } else {
  180. cleaned.AllowAndDenyIps = validPathResult.String()
  181. }
  182. }
  183. }
  184. // FormatBackendList 格式化后端地址列表(已简化)
  185. func (s *wafLogDataCleanService) FormatBackendList(backendList interface{}) string {
  186. if backendList == nil {
  187. return ""
  188. }
  189. switch v := backendList.(type) {
  190. case []string:
  191. return strings.Join(v, ", ")
  192. case []int64:
  193. if len(v) == 0 {
  194. return ""
  195. }
  196. var strList []string
  197. for _, id := range v {
  198. strList = append(strList, strconv.FormatInt(id, 10))
  199. }
  200. return strings.Join(strList, ", ")
  201. case string:
  202. return v
  203. default:
  204. // 其他类型直接转为字符串,作为最后的兼容手段
  205. return fmt.Sprintf("%v", v)
  206. }
  207. }