waflogdataclean.go 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. package admin
  2. import (
  3. "encoding/json"
  4. "github.com/go-nunu/nunu-layout-advanced/internal/service"
  5. "github.com/tidwall/gjson"
  6. "go.uber.org/zap"
  7. )
  8. type WafLogDataCleanService interface {
  9. ParseWafLogExtraData(extraDataBytes json.RawMessage, apiName string) CleanedExtraData
  10. }
  11. func NewWafLogDataCleanService(
  12. service *service.Service,
  13. ) WafLogDataCleanService {
  14. return &wafLogDataCleanService{
  15. Service: service,
  16. }
  17. }
  18. type wafLogDataCleanService struct {
  19. *service.Service
  20. }
  21. // BackendInfo 后端服务器信息
  22. type BackendInfo struct {
  23. Addr string `json:"addr,omitempty"` // 后端地址
  24. CustomHost string `json:"customHost,omitempty"` // 自定义Host头
  25. IsHttps int `json:"isHttps,omitempty"` // 是否为HTTPS
  26. }
  27. // CleanedExtraData 使用动态结构存储解析后的数据
  28. type CleanedExtraData struct {
  29. // 核心字段 - 新的数组结构
  30. BackendList []BackendInfo `json:"backendList,omitempty"` // 完整的后端信息数组
  31. // 向后兼容字段
  32. AddrBackendList []string `json:"addrBackendList,omitempty"` // 只包含地址的数组
  33. CustomHostList []string `json:"customHostList,omitempty"` // 只包含customHost的数组
  34. CustomHost []string `json:"customHost,omitempty"` // customHost数组,与CustomHostList相同
  35. // 基础字段
  36. Port string `json:"port,omitempty"`
  37. Domain string `json:"domain,omitempty"`
  38. Comment string `json:"comment,omitempty"`
  39. // 扩展字段
  40. UID int64 `json:"uid,omitempty"`
  41. HostID int64 `json:"hostId,omitempty"`
  42. Proxy bool `json:"proxy,omitempty"`
  43. IsHttps int `json:"isHttps,omitempty"`
  44. // 动态字段存储,用于存储任意其他字段
  45. DynamicFields map[string]interface{} `json:"dynamicFields,omitempty"`
  46. // 原始数据备份,用于调试和回溯
  47. RawData map[string]interface{} `json:"rawData,omitempty"`
  48. }
  49. // parseWafLogExtraData 使用gjson解析动态JSON结构,简洁高效
  50. func (s *wafLogDataCleanService) ParseWafLogExtraData(extraDataBytes json.RawMessage, apiName string) CleanedExtraData {
  51. var result CleanedExtraData
  52. result.DynamicFields = make(map[string]interface{})
  53. if len(extraDataBytes) == 0 {
  54. return result
  55. }
  56. jsonStr := string(extraDataBytes)
  57. if !gjson.Valid(jsonStr) {
  58. s.Logger.Warn("ExtraData 不是有效的JSON", zap.String("raw_data", jsonStr))
  59. return result
  60. }
  61. // 解析并保存原始数据
  62. var rawData map[string]interface{}
  63. json.Unmarshal(extraDataBytes, &rawData)
  64. result.RawData = rawData
  65. // 使用gjson进行智能字段提取
  66. s.extractWithGjson(jsonStr, apiName, &result)
  67. return result
  68. }
  69. // extractWithGjson 使用gjson进行智能字段提取
  70. func (s *wafLogDataCleanService) extractWithGjson(jsonStr, apiName string, result *CleanedExtraData) {
  71. // 提取顶层字段
  72. if uid := gjson.Get(jsonStr, "uid"); uid.Exists() {
  73. result.UID = uid.Int()
  74. }
  75. if hostId := gjson.Get(jsonStr, "hostId"); hostId.Exists() {
  76. result.HostID = hostId.Int()
  77. }
  78. // 定义常见字段路径的优先级列表
  79. fieldPaths := map[string][]string{
  80. "comment": {"comment", "data.comment", "desc", "description", "remark", "note"},
  81. "port": {"port", "data.port", "config.port", "server.port"},
  82. "domain": {"domain", "data.domain", "host", "data.host", "hostname"},
  83. "proxy": {"proxy", "data.proxy"},
  84. "isHttps": {"isHttps", "data.isHttps"},
  85. }
  86. // 提取基础字段
  87. for fieldName, paths := range fieldPaths {
  88. value := s.getFirstValidPath(jsonStr, paths)
  89. if value != "" {
  90. switch fieldName {
  91. case "comment":
  92. result.Comment = value
  93. case "port":
  94. result.Port = value
  95. case "domain":
  96. result.Domain = value
  97. case "proxy":
  98. result.Proxy = gjson.Get(jsonStr, s.getFirstValidPathName(jsonStr, paths)).Bool()
  99. case "isHttps":
  100. result.IsHttps = int(gjson.Get(jsonStr, s.getFirstValidPathName(jsonStr, paths)).Int())
  101. }
  102. }
  103. }
  104. // 智能提取 backendList
  105. s.extractBackendListWithGjson(jsonStr, apiName, result)
  106. // 提取所有其他动态字段
  107. s.extractDynamicFields(jsonStr, result)
  108. }
  109. // getFirstValidPath 从多个路径中获取第一个有效值
  110. func (s *wafLogDataCleanService) getFirstValidPath(jsonStr string, paths []string) string {
  111. for _, path := range paths {
  112. if value := gjson.Get(jsonStr, path); value.Exists() && value.String() != "" {
  113. return value.String()
  114. }
  115. }
  116. return ""
  117. }
  118. // getFirstValidPathName 获取第一个有效路径的名称
  119. func (s *wafLogDataCleanService) getFirstValidPathName(jsonStr string, paths []string) string {
  120. for _, path := range paths {
  121. if gjson.Get(jsonStr, path).Exists() {
  122. return path
  123. }
  124. }
  125. return ""
  126. }
  127. // extractBackendListWithGjson 使用gjson智能提取后端列表
  128. func (s *wafLogDataCleanService) extractBackendListWithGjson(jsonStr, apiName string, result *CleanedExtraData) {
  129. // 定义可能的后端列表字段路径
  130. backendPaths := []string{
  131. "backendList", "data.backendList", "backends", "data.backends",
  132. "backend_list", "data.backend_list", "servers", "data.servers",
  133. "upstreams", "data.upstreams", "targets", "data.targets",
  134. }
  135. for _, path := range backendPaths {
  136. backendResult := gjson.Get(jsonStr, path)
  137. if !backendResult.Exists() {
  138. continue
  139. }
  140. // 根据数据类型进行处理
  141. switch {
  142. case backendResult.IsArray():
  143. s.processArrayBackends(backendResult, result)
  144. }
  145. // 找到有效数据就退出
  146. if len(result.AddrBackendList) > 0 || len(result.CustomHostList) > 0 {
  147. break
  148. }
  149. }
  150. }
  151. // processArrayBackends 处理数组格式的后端列表
  152. func (s *wafLogDataCleanService) processArrayBackends(backendResult gjson.Result, result *CleanedExtraData) {
  153. backendResult.ForEach(func(key, value gjson.Result) bool {
  154. if value.IsObject() {
  155. // 创建BackendInfo结构
  156. backend := BackendInfo{}
  157. isHttps := gjson.Get(value.Raw, "isHttps").Int()
  158. // 尝试提取地址字段
  159. addr := s.getFirstValidPath(value.Raw, []string{"addr", "address", "host", "server", "endpoint", "url"})
  160. if isHttps == 1 {
  161. addr = "https://" + addr
  162. }else {
  163. addr = "http://" + addr
  164. }
  165. if addr != "" {
  166. backend.Addr = addr
  167. result.AddrBackendList = append(result.AddrBackendList, addr)
  168. }
  169. // 提取customHost
  170. if customHost := gjson.Get(value.Raw, "customHost").String(); customHost != "" {
  171. backend.CustomHost = customHost
  172. result.CustomHostList = append(result.CustomHostList, customHost)
  173. result.CustomHost = append(result.CustomHost, customHost)
  174. }
  175. // 只有当有有效数据时才添加到BackendList
  176. if backend.Addr != "" || backend.CustomHost != "" {
  177. result.BackendList = append(result.BackendList, backend)
  178. }
  179. } else {
  180. // 直接作为地址处理
  181. if addr := value.String(); addr != "" {
  182. result.AddrBackendList = append(result.AddrBackendList, addr)
  183. result.BackendList = append(result.BackendList, BackendInfo{Addr: addr})
  184. }
  185. }
  186. return true
  187. })
  188. }
  189. // extractDynamicFields 提取所有动态字段到DynamicFields中
  190. func (s *wafLogDataCleanService) extractDynamicFields(jsonStr string, result *CleanedExtraData) {
  191. // 已知的核心字段,不放入动态字段中
  192. knownFields := map[string]bool{
  193. "comment": true, "port": true, "domain": true,
  194. "backendList": true, "backends": true, "backend_list": true,
  195. "data": true, // data字段的内容会被单独处理
  196. }
  197. // 遍历顶层字段
  198. gjson.Parse(jsonStr).ForEach(func(key, value gjson.Result) bool {
  199. fieldName := key.String()
  200. if !knownFields[fieldName] {
  201. // 将未知字段存储到动态字段中
  202. result.DynamicFields[fieldName] = value.Value()
  203. }
  204. return true
  205. })
  206. // 特殊处理data字段中的未知字段
  207. dataResult := gjson.Get(jsonStr, "data")
  208. if dataResult.Exists() && dataResult.IsObject() {
  209. dataKnownFields := map[string]bool{
  210. "port": true, "domain": true, "backendList": true,
  211. "backends": true, "backend_list": true,
  212. }
  213. dataResult.ForEach(func(key, value gjson.Result) bool {
  214. fieldName := key.String()
  215. if !dataKnownFields[fieldName] {
  216. // 使用data.前缀避免冲突
  217. result.DynamicFields["data."+fieldName] = value.Value()
  218. }
  219. return true
  220. })
  221. }
  222. }