waflogdataclean.go 11 KB


  1. package admin
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "github.com/go-nunu/nunu-layout-advanced/internal/service"
  6. "github.com/tidwall/gjson"
  7. "go.uber.org/zap"
  8. "strings"
  9. )
  10. type WafLogDataCleanService interface {
  11. ParseWafLogExtraData(extraDataBytes json.RawMessage, apiName string) CleanedExtraData
  12. FormatBackendList(backendList interface{}) string
  13. }
  14. func NewWafLogDataCleanService(
  15. service *service.Service,
  16. ) WafLogDataCleanService {
  17. return &wafLogDataCleanService{
  18. Service: service,
  19. }
  20. }
  21. type wafLogDataCleanService struct {
  22. *service.Service
  23. }
  24. // BackendInfo 后端服务器信息
  25. type BackendInfo struct {
  26. Addr string `json:"addr,omitempty"` // 后端地址
  27. CustomHost string `json:"customHost,omitempty"` // 自定义Host头
  28. IsHttps int `json:"isHttps,omitempty"` // 是否为HTTPS
  29. }
  30. // CleanedExtraData 使用动态结构存储解析后的数据
  31. type CleanedExtraData struct {
  32. // 核心字段 - 新的数组结构
  33. BackendList []BackendInfo `json:"backendList,omitempty"` // 完整的后端信息数组
  34. // 向后兼容字段
  35. AddrBackendList []string `json:"addrBackendList,omitempty"` // 只包含地址的数组
  36. CustomHostList []string `json:"customHostList,omitempty"` // 只包含customHost的数组
  37. CustomHost []string `json:"customHost,omitempty"` // customHost数组,与CustomHostList相同
  38. // 基础字段
  39. Port string `json:"port,omitempty"`
  40. Domain string `json:"domain,omitempty"`
  41. Comment string `json:"comment,omitempty"`
  42. // 扩展字段
  43. UID int64 `json:"uid,omitempty"`
  44. HostID int64 `json:"hostId,omitempty"`
  45. Proxy bool `json:"proxy,omitempty"`
  46. IsHttps int `json:"isHttps,omitempty"`
  47. RuleID []int64 `json:"ruleId,omitempty"`
  48. // 其他字段
  49. AllowAndDenyIps string `json:"allowAndDenyIps,omitempty"`
  50. // 动态字段存储,用于存储任意其他字段
  51. DynamicFields map[string]interface{} `json:"dynamicFields,omitempty"`
  52. // 原始数据备份,用于调试和回溯
  53. RawData map[string]interface{} `json:"rawData,omitempty"`
  54. }
  55. // parseWafLogExtraData 使用gjson解析动态JSON结构,简洁高效
  56. func (s *wafLogDataCleanService) ParseWafLogExtraData(extraDataBytes json.RawMessage, apiName string) CleanedExtraData {
  57. var result CleanedExtraData
  58. result.DynamicFields = make(map[string]interface{})
  59. if len(extraDataBytes) == 0 {
  60. return result
  61. }
  62. jsonStr := string(extraDataBytes)
  63. if !gjson.Valid(jsonStr) {
  64. s.Logger.Warn("ExtraData 不是有效的JSON", zap.String("raw_data", jsonStr))
  65. return result
  66. }
  67. // 解析并保存原始数据
  68. var rawData map[string]interface{}
  69. json.Unmarshal(extraDataBytes, &rawData)
  70. result.RawData = rawData
  71. // 使用gjson进行智能字段提取
  72. s.extractWithGjson(jsonStr, apiName, &result)
  73. return result
  74. }
  75. // extractWithGjson 使用gjson进行智能字段提取
  76. func (s *wafLogDataCleanService) extractWithGjson(jsonStr, apiName string, result *CleanedExtraData) {
  77. // 提取顶层字段
  78. if uid := gjson.Get(jsonStr, "uid"); uid.Exists() {
  79. result.UID = uid.Int()
  80. }
  81. if hostId := gjson.Get(jsonStr, "hostId"); hostId.Exists() {
  82. result.HostID = hostId.Int()
  83. }
  84. // 定义常见字段路径的优先级列表
  85. fieldPaths := map[string][]string{
  86. "comment": {"comment", "data.comment", "desc", "description", "remark", "note"},
  87. "port": {"port", "data.port", "config.port", "server.port"},
  88. "domain": {"domain", "data.domain", "host", "data.host", "hostname"},
  89. "proxy": {"proxy", "data.proxy"},
  90. "isHttps": {"isHttps", "data.isHttps"},
  91. "ids": {"ids", "data.ids", "ruleIds", "data.ruleIds", "ruleId", "data.ruleId"},
  92. "ip": {"ip","newIp","ips"},
  93. }
  94. // 提取基础字段
  95. for fieldName, paths := range fieldPaths {
  96. value := s.getFirstValidPath(jsonStr, paths)
  97. if value != "" {
  98. switch fieldName {
  99. case "comment":
  100. result.Comment = value
  101. case "port":
  102. result.Port = value
  103. case "domain":
  104. result.Domain = value
  105. case "proxy":
  106. result.Proxy = gjson.Get(jsonStr, s.getFirstValidPathName(jsonStr, paths)).Bool()
  107. case "isHttps":
  108. result.IsHttps = int(gjson.Get(jsonStr, s.getFirstValidPathName(jsonStr, paths)).Int())
  109. case "ids":
  110. result.RuleID = s.extractRuleIDs(jsonStr, paths)
  111. case "ip":
  112. result.AllowAndDenyIps = value
  113. }
  114. }
  115. }
  116. // 智能提取 backendList
  117. s.extractBackendListWithGjson(jsonStr, apiName, result)
  118. // 提取所有其他动态字段
  119. s.extractDynamicFields(jsonStr, result)
  120. }
  121. // getFirstValidPath 从多个路径中获取第一个有效值
  122. func (s *wafLogDataCleanService) getFirstValidPath(jsonStr string, paths []string) string {
  123. for _, path := range paths {
  124. if value := gjson.Get(jsonStr, path); value.Exists() && value.String() != "" {
  125. return value.String()
  126. }
  127. }
  128. return ""
  129. }
  130. // getFirstValidPathName 获取第一个有效路径的名称
  131. func (s *wafLogDataCleanService) getFirstValidPathName(jsonStr string, paths []string) string {
  132. for _, path := range paths {
  133. if gjson.Get(jsonStr, path).Exists() {
  134. return path
  135. }
  136. }
  137. return ""
  138. }
  139. // extractRuleIDs 提取规则ID数组
  140. func (s *wafLogDataCleanService) extractRuleIDs(jsonStr string, paths []string) []int64 {
  141. var ruleIDs []int64
  142. for _, path := range paths {
  143. ruleResult := gjson.Get(jsonStr, path)
  144. if !ruleResult.Exists() {
  145. continue
  146. }
  147. switch {
  148. case ruleResult.IsArray():
  149. // 如果是数组,遍历提取每个ID
  150. ruleResult.ForEach(func(key, value gjson.Result) bool {
  151. if id := value.Int(); id > 0 {
  152. ruleIDs = append(ruleIDs, id)
  153. }
  154. return true
  155. })
  156. default:
  157. // 如果是单个值,添加到数组中
  158. if id := ruleResult.Int(); id > 0 {
  159. ruleIDs = append(ruleIDs, id)
  160. }
  161. }
  162. // 找到有效数据就退出
  163. if len(ruleIDs) > 0 {
  164. break
  165. }
  166. }
  167. return ruleIDs
  168. }
  169. // extractBackendListWithGjson 使用gjson智能提取后端列表
  170. func (s *wafLogDataCleanService) extractBackendListWithGjson(jsonStr, apiName string, result *CleanedExtraData) {
  171. // 定义可能的后端列表字段路径
  172. backendPaths := []string{
  173. "data.backendList", "backendList", "backends", "data.backends",
  174. "backend_list", "data.backend_list", "servers", "data.servers",
  175. "upstreams", "data.upstreams", "targets", "data.targets",
  176. }
  177. for _, path := range backendPaths {
  178. backendResult := gjson.Get(jsonStr, path)
  179. if !backendResult.Exists() {
  180. continue
  181. }
  182. // 根据数据类型进行处理
  183. switch {
  184. case backendResult.IsArray():
  185. s.processArrayBackends(backendResult, result)
  186. }
  187. // 找到有效数据就退出
  188. if len(result.AddrBackendList) > 0 || len(result.CustomHostList) > 0 {
  189. break
  190. }
  191. }
  192. }
  193. // processArrayBackends 处理数组格式的后端列表
  194. func (s *wafLogDataCleanService) processArrayBackends(backendResult gjson.Result, result *CleanedExtraData) {
  195. backendResult.ForEach(func(key, value gjson.Result) bool {
  196. if value.IsObject() {
  197. // 创建BackendInfo结构
  198. backend := BackendInfo{}
  199. isHttps := gjson.Get(value.Raw, "isHttps").Int()
  200. // 尝试提取地址字段
  201. addr := s.getFirstValidPath(value.Raw, []string{"addr", "address", "host", "server", "endpoint", "url"})
  202. if isHttps == 1 {
  203. addr = "https://" + addr
  204. }else {
  205. addr = "http://" + addr
  206. }
  207. if addr != "" {
  208. backend.Addr = addr
  209. result.AddrBackendList = append(result.AddrBackendList, addr)
  210. }
  211. // 提取customHost
  212. if customHost := gjson.Get(value.Raw, "customHost").String(); customHost != "" {
  213. backend.CustomHost = customHost
  214. result.CustomHostList = append(result.CustomHostList, customHost)
  215. result.CustomHost = append(result.CustomHost, customHost)
  216. }
  217. // 只有当有有效数据时才添加到BackendList
  218. if backend.Addr != "" || backend.CustomHost != "" {
  219. result.BackendList = append(result.BackendList, backend)
  220. }
  221. } else {
  222. // 直接作为地址处理
  223. if addr := value.String(); addr != "" {
  224. result.AddrBackendList = append(result.AddrBackendList, addr)
  225. result.BackendList = append(result.BackendList, BackendInfo{Addr: addr})
  226. }
  227. }
  228. return true
  229. })
  230. }
  231. // extractDynamicFields 提取所有动态字段到DynamicFields中
  232. func (s *wafLogDataCleanService) extractDynamicFields(jsonStr string, result *CleanedExtraData) {
  233. // 已知的核心字段,不放入动态字段中
  234. knownFields := map[string]bool{
  235. "comment": true, "port": true, "domain": true,
  236. "backendList": true, "backends": true, "backend_list": true,
  237. "data": true, // data字段的内容会被单独处理
  238. }
  239. // 遍历顶层字段
  240. gjson.Parse(jsonStr).ForEach(func(key, value gjson.Result) bool {
  241. fieldName := key.String()
  242. if !knownFields[fieldName] {
  243. // 将未知字段存储到动态字段中
  244. result.DynamicFields[fieldName] = value.Value()
  245. }
  246. return true
  247. })
  248. // 特殊处理data字段中的未知字段
  249. dataResult := gjson.Get(jsonStr, "data")
  250. if dataResult.Exists() && dataResult.IsObject() {
  251. dataKnownFields := map[string]bool{
  252. "port": true, "domain": true, "backendList": true,
  253. "backends": true, "backend_list": true,
  254. }
  255. dataResult.ForEach(func(key, value gjson.Result) bool {
  256. fieldName := key.String()
  257. if !dataKnownFields[fieldName] {
  258. // 使用data.前缀避免冲突
  259. result.DynamicFields["data."+fieldName] = value.Value()
  260. }
  261. return true
  262. })
  263. }
  264. }
  265. // formatBackendList 格式化后端地址列表
  266. func (s *wafLogDataCleanService) FormatBackendList(backendList interface{}) string {
  267. if backendList == nil {
  268. return ""
  269. }
  270. switch v := backendList.(type) {
  271. case string:
  272. return v
  273. case []string:
  274. if len(v) == 0 {
  275. return ""
  276. }
  277. return strings.Join(v, ", ")
  278. case []int64:
  279. // 处理 []int64 类型的数组(如 RuleId)
  280. if len(v) == 0 {
  281. return ""
  282. }
  283. var strList []string
  284. for _, id := range v {
  285. strList = append(strList, fmt.Sprintf("%d", id))
  286. }
  287. return strings.Join(strList, ", ")
  288. case []interface{}:
  289. // 处理 []interface{} 类型的数组
  290. if len(v) == 0 {
  291. return ""
  292. }
  293. var strList []string
  294. for _, item := range v {
  295. if str := fmt.Sprintf("%v", item); str != "" && str != "<nil>" {
  296. strList = append(strList, str)
  297. }
  298. }
  299. return strings.Join(strList, ", ")
  300. default:
  301. // 对于其他类型,先转换为字符串再处理
  302. str := fmt.Sprintf("%v", v)
  303. // 处理 Go 数组格式 [item1 item2] -> item1, item2
  304. if strings.HasPrefix(str, "[") && strings.HasSuffix(str, "]") {
  305. // 移除方括号
  306. content := strings.Trim(str, "[]")
  307. if content != "" {
  308. // 按空格分割并用逗号连接
  309. parts := strings.Fields(content)
  310. if len(parts) > 1 {
  311. return strings.Join(parts, ", ")
  312. }
  313. return content
  314. }
  315. }
  316. // 处理其他包含空格的字符串
  317. if strings.Contains(str, " ") && !strings.Contains(str, "\n") {
  318. parts := strings.Fields(str)
  319. if len(parts) > 1 {
  320. return strings.Join(parts, ", ")
  321. }
  322. }
  323. return str
  324. }
  325. }