waflogdataclean.go 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. package admin
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "github.com/go-nunu/nunu-layout-advanced/internal/service"
  6. "github.com/tidwall/gjson"
  7. "go.uber.org/zap"
  8. "strconv"
  9. "strings"
  10. )
  11. // --- 1. 集中化字段路径配置 ---
  12. // FieldPathConfig 定义了提取一个特定字段所需的所有信息
  13. type FieldPathConfig struct {
  14. // Paths 是一个优先级列表,解析器会从前到后尝试这些路径
  15. Paths []string
  16. // FieldType 指示字段的预期类型,用于特殊处理(如'array_object', 'array_string', 'bool')
  17. FieldType string
  18. }
  19. // apiFieldMappings 是驱动整个解析逻辑的核心配置
  20. // Key: API名称的关键字 (e.g., "web", "tcp", "allowAndDeny")
  21. // Value: 一个映射,定义了该API类型下需要提取的各个字段及其查找路径
  22. var apiFieldMappings = map[string]map[string]FieldPathConfig{
  23. "web": {
  24. "Comment": {Paths: []string{"comment", "data.comment", "desc"}},
  25. "Port": {Paths: []string{"port", "data.port"}},
  26. "Domain": {Paths: []string{"domain", "data.domain", "host"}},
  27. "IsHttps": {Paths: []string{"isHttps", "data.isHttps"}},
  28. "RuleID": {Paths: []string{"ruleId", "data.ruleId", "ids", "data.ids"}, FieldType: "array_int"},
  29. "BackendList": {Paths: []string{"backendList", "data.backendList", "backends"}, FieldType: "array_object"},
  30. },
  31. "tcp": {
  32. "Comment": {Paths: []string{"comment", "data.comment", "desc"}},
  33. "Port": {Paths: []string{"port", "data.port"}},
  34. "RuleID": {Paths: []string{"ruleId", "data.ruleId", "ids", "data.ids"}, FieldType: "array_int"},
  35. "AddrBackendList": {Paths: []string{"backendList", "data.backendList"}, FieldType: "array_string"},
  36. },
  37. "udp": { // UDP 和 TCP 结构类似
  38. "Comment": {Paths: []string{"comment", "data.comment", "desc"}},
  39. "Port": {Paths: []string{"port", "data.port"}},
  40. "RuleID": {Paths: []string{"ruleId", "data.ruleId", "ids", "data.ids"}, FieldType: "array_int"},
  41. "AddrBackendList": {Paths: []string{"backendList", "data.backendList"}, FieldType: "array_string"},
  42. },
  43. "实例": {
  44. "Comment": {Paths: []string{"comment", "data.comment", "desc"}},
  45. "RuleID": {Paths: []string{"ruleId", "data.ruleId", "ids", "data.ids"}, FieldType: "array_int"},
  46. },
  47. "黑白名单": {
  48. "AllowAndDenyIps": {Paths: []string{"ip", "ips", "data.ip", "data.ips"}, FieldType: "array_string"},
  49. "RuleID": {Paths: []string{"ruleId", "data.ruleId", "ids", "data.ids"}, FieldType: "array_int"},
  50. "Comment": {Paths: []string{"comment", "data.comment", "desc"}},
  51. "AllowOrDeny" : {Paths: []string{"allowOrDeny"}, FieldType: "int"},
  52. },
  53. "CC": {
  54. "AllowAndDenyIps": {Paths: []string{"ips","newIp", "data.newIp","data.ips", "ip", "data.ip" }},
  55. "RuleID": {Paths: []string{"allowOrDeny"}, FieldType: "int"},
  56. },
  57. // "分配网关组" 的日志通常不包含用户层面的业务数据,所以这里不定义
  58. }
  59. // --- 2. 清洗后的统一数据结构 ---
  60. type CleanedExtraData struct {
  61. Port string
  62. Domain string
  63. Comment string
  64. IsHttps int
  65. RuleID []int64
  66. AddrBackendList []string
  67. CustomHost []string
  68. AllowAndDenyIps string
  69. AllowOrDeny int
  70. }
  71. // --- 3. 服务实现 ---
  72. type WafLogDataCleanService interface {
  73. ParseWafLogExtraData(extraDataBytes json.RawMessage, apiName string) CleanedExtraData
  74. FormatBackendList(backendList interface{}) string
  75. }
  76. func NewWafLogDataCleanService(
  77. service *service.Service,
  78. ) WafLogDataCleanService {
  79. return &wafLogDataCleanService{
  80. Service: service,
  81. }
  82. }
  83. type wafLogDataCleanService struct {
  84. *service.Service
  85. }
  86. // ParseWafLogExtraData 使用配置驱动的 gjson 解析,兼具灵活性和可维护性
  87. func (s *wafLogDataCleanService) ParseWafLogExtraData(extraDataBytes json.RawMessage, apiName string) CleanedExtraData {
  88. var cleaned CleanedExtraData
  89. if len(extraDataBytes) == 0 || !gjson.Valid(string(extraDataBytes)) {
  90. if len(extraDataBytes) > 0 {
  91. s.Logger.Warn("ExtraData 不是有效的JSON", zap.String("raw_data", string(extraDataBytes)))
  92. }
  93. return cleaned
  94. }
  95. jsonStr := string(extraDataBytes)
  96. // 根据 apiName 找到对应的字段映射配置
  97. var fieldConfig map[string]FieldPathConfig
  98. for keyword, config := range apiFieldMappings {
  99. if strings.Contains(apiName, keyword) {
  100. fieldConfig = config
  101. break
  102. }
  103. }
  104. // 如果没有找到配置,直接返回空结构
  105. if fieldConfig == nil {
  106. return cleaned
  107. }
  108. // 通用、循环地提取字段
  109. for fieldName, config := range fieldConfig {
  110. s.extractField(jsonStr, fieldName, config, &cleaned)
  111. }
  112. return cleaned
  113. }
  114. // extractField 是一个通用的字段提取辅助函数
  115. func (s *wafLogDataCleanService) extractField(jsonStr, fieldName string, config FieldPathConfig, cleaned *CleanedExtraData) {
  116. // 找到第一个有效的路径和其结果
  117. var validPathResult gjson.Result
  118. for _, path := range config.Paths {
  119. result := gjson.Get(jsonStr, path)
  120. if result.Exists() {
  121. validPathResult = result
  122. break
  123. }
  124. }
  125. if !validPathResult.Exists() {
  126. return // 如果所有路径都找不到,直接返回
  127. }
  128. // 根据字段名称和类型将结果赋值给 CleanedExtraData
  129. switch fieldName {
  130. case "Comment":
  131. cleaned.Comment = validPathResult.String()
  132. case "Port":
  133. cleaned.Port = validPathResult.String()
  134. case "Domain":
  135. cleaned.Domain = validPathResult.String()
  136. case "IsHttps":
  137. cleaned.IsHttps = int(validPathResult.Int())
  138. case "RuleID":
  139. if validPathResult.IsArray() {
  140. validPathResult.ForEach(func(_, value gjson.Result) bool {
  141. cleaned.RuleID = append(cleaned.RuleID, value.Int())
  142. return true
  143. })
  144. } else {
  145. cleaned.RuleID = append(cleaned.RuleID, validPathResult.Int())
  146. }
  147. case "BackendList": // 特殊处理对象数组
  148. if validPathResult.IsArray() {
  149. validPathResult.ForEach(func(_, value gjson.Result) bool {
  150. if value.IsObject() {
  151. addr := gjson.Get(value.Raw, "addr").String()
  152. customHost := gjson.Get(value.Raw, "customHost").String()
  153. isHttps := gjson.Get(value.Raw, "isHttps").Int()
  154. if isHttps == 1 {
  155. addr = "https://" + addr
  156. } else {
  157. addr = "http://" + addr
  158. }
  159. cleaned.AddrBackendList = append(cleaned.AddrBackendList, addr)
  160. cleaned.CustomHost = append(cleaned.CustomHost, customHost)
  161. }
  162. return true
  163. })
  164. }
  165. case "AddrBackendList": // 处理字符串数组
  166. if validPathResult.IsArray() {
  167. validPathResult.ForEach(func(_, value gjson.Result) bool {
  168. cleaned.AddrBackendList = append(cleaned.AddrBackendList, value.String())
  169. return true
  170. })
  171. }
  172. case "AllowAndDenyIps":
  173. if validPathResult.IsArray() {
  174. var ips []string
  175. validPathResult.ForEach(func(_, value gjson.Result) bool {
  176. ips = append(ips, value.String())
  177. return true
  178. })
  179. cleaned.AllowAndDenyIps = strings.Join(ips, ", ")
  180. } else {
  181. cleaned.AllowAndDenyIps = validPathResult.String()
  182. }
  183. case "AllowOrDeny":
  184. cleaned.AllowOrDeny = int(validPathResult.Int())
  185. }
  186. }
  187. // FormatBackendList 格式化后端地址列表(已简化)
  188. func (s *wafLogDataCleanService) FormatBackendList(backendList interface{}) string {
  189. if backendList == nil {
  190. return ""
  191. }
  192. switch v := backendList.(type) {
  193. case []string:
  194. return strings.Join(v, ", ")
  195. case []int64:
  196. if len(v) == 0 {
  197. return ""
  198. }
  199. var strList []string
  200. for _, id := range v {
  201. strList = append(strList, strconv.FormatInt(id, 10))
  202. }
  203. return strings.Join(strList, ", ")
  204. case string:
  205. return v
  206. default:
  207. // 其他类型直接转为字符串,作为最后的兼容手段
  208. return fmt.Sprintf("%v", v)
  209. }
  210. }