gameShieldCrawler.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. package service
  2. import (
  3. "bytes"
  4. "compress/flate"
  5. "compress/gzip"
  6. "context"
  7. "crypto/rand"
  8. "crypto/tls"
  9. "encoding/hex"
  10. "encoding/json"
  11. "fmt"
  12. "github.com/PuerkitoBio/goquery"
  13. "github.com/spf13/viper"
  14. "github.com/tidwall/gjson"
  15. "go.uber.org/zap"
  16. "io"
  17. "mime/multipart"
  18. "net/http"
  19. "net/url"
  20. "strings"
  21. "time"
  22. )
  23. type CrawlerService interface {
  24. GetLoginCookie(ctx context.Context) (string, error)
  25. GetFormTokens(ctx context.Context, loginUrl string, cookieHeader string) (map[string]string, error)
  26. SendFormData(ctx context.Context, url string, cookie string, formData map[string]interface{}) ([]byte, error)
  27. GetField(ctx context.Context, appName string) (map[string]interface{}, error)
  28. GetKey(ctx context.Context, appName string) (string, error)
  29. DeleteRule(ctx context.Context, ruleID int, ruleUrl string) (string, error)
  30. FetchPageContent(ctx context.Context, url string, cookie string) ([]byte, error)
  31. }
  32. type CrawlerConfig struct {
  33. Username string
  34. Password string
  35. URL string
  36. KeyURL string
  37. }
  38. func NewCrawlerService(
  39. service *Service,
  40. parser ParserService,
  41. conf *viper.Viper,
  42. ) CrawlerService {
  43. return &crawlerService{
  44. Service: service,
  45. parser: parser,
  46. config: &CrawlerConfig{
  47. Username: conf.GetString("crawler.username"),
  48. Password: conf.GetString("crawler.password"),
  49. URL: conf.GetString("crawler.Url"),
  50. KeyURL: conf.GetString("crawler.keyUrl"),
  51. },
  52. }
  53. }
  54. type crawlerService struct {
  55. *Service
  56. parser ParserService
  57. config *CrawlerConfig
  58. }
  59. // 生成随机字符串
  60. func randomHex(n int) string {
  61. b := make([]byte, n)
  62. _, err := rand.Read(b)
  63. if err != nil {
  64. panic(err)
  65. }
  66. return hex.EncodeToString(b)
  67. }
  68. // 获取登录cookie
  69. func (service *crawlerService) GetLoginCookie(ctx context.Context) (string, error) {
  70. data := url.Values{}
  71. data.Set("username", service.config.Username)
  72. data.Set("password", service.config.Password)
  73. loginUrl := service.config.URL + "admin/signin"
  74. req, err := http.NewRequestWithContext(ctx, "POST", loginUrl, strings.NewReader(data.Encode()))
  75. if err != nil {
  76. return "", fmt.Errorf("操作失败: %v", err)
  77. }
  78. service.logger.WithValue(ctx, zap.Time("time0========================", time.Now()))
  79. // 添加关键请求头
  80. req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
  81. req.Header.Set("Expect", "")
  82. req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36")
  83. req.Header.Set("Referer", loginUrl)
  84. req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
  85. client := &http.Client{
  86. CheckRedirect: func(req *http.Request, via []*http.Request) error {
  87. return http.ErrUseLastResponse // 禁止自动跳转
  88. },
  89. Transport: &http.Transport{
  90. TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
  91. },
  92. }
  93. resp, err := client.Do(req)
  94. if err != nil {
  95. return "", fmt.Errorf("%v", err)
  96. }
  97. defer resp.Body.Close()
  98. service.logger.WithValue(ctx, zap.Time("time1===================================", time.Now()))
  99. // 输出响应体,调试用
  100. _, err = io.ReadAll(resp.Body)
  101. if err != nil {
  102. return "", fmt.Errorf("读取响应失败: %v", err)
  103. }
  104. // 提取原始 Header 中的 Set-Cookie 字段
  105. rawCookies := resp.Header["Set-Cookie"]
  106. var cookieStr strings.Builder
  107. for _, cookie := range rawCookies {
  108. parts := strings.SplitN(cookie, ";", 2)
  109. if len(parts) > 0 {
  110. cookieStr.WriteString(parts[0] + "; ")
  111. }
  112. }
  113. service.logger.WithValue(ctx, zap.Time("time2=====================================", time.Now()))
  114. cookieHeader := strings.TrimRight(cookieStr.String(), "; ")
  115. if cookieHeader == "" {
  116. return "", fmt.Errorf("获取 Cookie 失败")
  117. }
  118. return cookieHeader, nil
  119. }
  120. // 获取表单令牌
  121. func (service *crawlerService) GetFormTokens(ctx context.Context, loginUrl string, cookieHeader string) (map[string]string, error) {
  122. req, err := http.NewRequestWithContext(ctx, "GET", loginUrl, nil)
  123. if err != nil {
  124. return nil, fmt.Errorf("创建请求失败: %v", err)
  125. }
  126. // 设置请求头,包括 Cookie 和 PJAX 头
  127. req.Header.Set("Cookie", cookieHeader)
  128. req.Header.Set("X-PJAX", "true")
  129. req.Header.Set("X-PJAX-Container", "#pjax-container")
  130. req.Header.Set("X-Requested-With", "XMLHttpRequest")
  131. req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36")
  132. // HTTP 客户端,跳过 SSL 验证
  133. client := &http.Client{
  134. Transport: &http.Transport{
  135. TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
  136. },
  137. }
  138. // 发送请求
  139. resp, err := client.Do(req)
  140. if err != nil {
  141. return nil, fmt.Errorf("请求失败: %v", err)
  142. }
  143. defer resp.Body.Close()
  144. // 使用 goquery 解析 HTML
  145. doc, err := goquery.NewDocumentFromReader(resp.Body)
  146. if err != nil {
  147. return nil, fmt.Errorf("解析 HTML 失败: %v", err)
  148. }
  149. // 提取隐藏字段
  150. previous := doc.Find(`input[name="__go_admin_previous_"]`).AttrOr("value", "默认值")
  151. t := doc.Find(`input[name="__go_admin_t_"]`).AttrOr("value", "默认值")
  152. return map[string]string{
  153. "previous": previous,
  154. "t": t,
  155. }, nil
  156. }
  157. // 发送 POST 请求
  158. func (service *crawlerService) SendFormData(ctx context.Context, url string, cookie string, formData map[string]interface{}) ([]byte, error) {
  159. var buf bytes.Buffer
  160. writer := multipart.NewWriter(&buf)
  161. // 遍历字段添加到 multipart 表单中
  162. for key, val := range formData {
  163. valueStr := fmt.Sprintf("%v", val) // 转为字符串
  164. if err := writer.WriteField(key, valueStr); err != nil {
  165. return nil, fmt.Errorf("写入字段失败: %v", err)
  166. }
  167. }
  168. // 关闭 writer 以完成结尾边界写入
  169. if err := writer.Close(); err != nil {
  170. return nil, fmt.Errorf("关闭 multipart writer 失败: %v", err)
  171. }
  172. // 构造请求
  173. req, err := http.NewRequestWithContext(ctx, "POST", url, &buf)
  174. if err != nil {
  175. return nil, fmt.Errorf("创建请求失败: %v", err)
  176. }
  177. // 设置请求头
  178. req.Header.Set("Content-Type", writer.FormDataContentType())
  179. req.Header.Set("X-PJAX", "true")
  180. req.Header.Set("X-PJAX-Container", "#pjax-container")
  181. req.Header.Set("X-Requested-With", "XMLHttpRequest")
  182. req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64)")
  183. req.Header.Set("Accept", "text/html, */*; q=0.01")
  184. req.Header.Set("Accept-Encoding", "gzip, deflate")
  185. req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7")
  186. req.Header.Set("Cookie", cookie)
  187. req.Header.Set("Expect", "")
  188. // 跳过 SSL 验证的 HTTP 客户端
  189. client := &http.Client{
  190. Transport: &http.Transport{
  191. TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
  192. },
  193. }
  194. // 发起请求
  195. resp, err := client.Do(req)
  196. if err != nil {
  197. return nil, fmt.Errorf("请求发送失败: %v", err)
  198. }
  199. defer resp.Body.Close()
  200. // 读取响应
  201. res, err := io.ReadAll(resp.Body)
  202. if err != nil {
  203. return nil, fmt.Errorf("读取响应失败: %v", err)
  204. }
  205. return res, nil
  206. }
  207. // 获取 rule_id
  208. func (service *crawlerService) GetField(ctx context.Context, appName string) (map[string]interface{}, error) {
  209. keyURL := service.config.KeyURL + appName
  210. resp, err := http.Get(keyURL)
  211. if err != nil {
  212. return nil, fmt.Errorf("请求失败:%w", err)
  213. }
  214. defer resp.Body.Close()
  215. body, err := io.ReadAll(resp.Body)
  216. if err != nil {
  217. return nil, fmt.Errorf("读取响应体失败:%w", err)
  218. }
  219. // 先用 gjson 拿到 data.raw 对应的原始 JSON
  220. result := gjson.GetBytes(body, "data.raw")
  221. if !result.Exists() {
  222. return nil, fmt.Errorf("响应中缺少 data.raw")
  223. }
  224. // 再把这一段反序列化到 map
  225. var rawMap map[string]interface{}
  226. if err := json.Unmarshal([]byte(result.Raw), &rawMap); err != nil {
  227. return nil, fmt.Errorf("解析 data.raw 失败:%w", err)
  228. }
  229. return rawMap, nil
  230. }
  231. func (service *crawlerService) GetKey(ctx context.Context, appName string) (string, error) {
  232. resp, err := http.Get(service.config.KeyURL + appName)
  233. if err != nil {
  234. return "", fmt.Errorf("请求失败:%w", err)
  235. }
  236. defer resp.Body.Close()
  237. body, err := io.ReadAll(resp.Body)
  238. if err != nil {
  239. return "", fmt.Errorf("读取响应体失败:%w", err)
  240. }
  241. // 2. 直接从 JSON 路径 data.key 拿字符串
  242. result := gjson.GetBytes(body, "data.key")
  243. if !result.Exists() {
  244. return "", fmt.Errorf("响应中缺少 data.key")
  245. }
  246. return result.String(), nil
  247. }
  248. func (service *crawlerService) DeleteRule(ctx context.Context, ruleID int, ruleUrl string) (string, error) {
  249. // 1. 登录,拿到 Cookie
  250. cookie, err := service.GetLoginCookie(ctx)
  251. if err != nil {
  252. return "", fmt.Errorf("login failed: %w", err)
  253. }
  254. // 2. 构造删除请求 URL 和表单
  255. deleteURL := service.config.URL + ruleUrl
  256. formData := map[string]interface{}{
  257. "id": ruleID,
  258. }
  259. // 3. 发表单(multipart 也支持 x-www-form-urlencoded,你这里用已有的 SendFormData)
  260. respBody, err := service.SendFormData(ctx, deleteURL, cookie, formData)
  261. if err != nil {
  262. return "", err
  263. }
  264. res, err := service.parser.GetMessage(ctx, respBody)
  265. if err != nil {
  266. return "", err
  267. }
  268. return res, nil
  269. }
  270. func (service *crawlerService) FetchPageContent(ctx context.Context, url string, cookie string) ([]byte, error) {
  271. fetchUrl := service.config.URL + url
  272. // 配置 HTTP 客户端
  273. client := &http.Client{
  274. Transport: &http.Transport{
  275. TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
  276. MaxIdleConns: 100,
  277. MaxIdleConnsPerHost: 100,
  278. IdleConnTimeout: 90 * time.Second,
  279. },
  280. Timeout: 30 * time.Second,
  281. }
  282. // 构造请求
  283. req, err := http.NewRequestWithContext(ctx, "GET", fetchUrl, nil)
  284. if err != nil {
  285. return nil, fmt.Errorf("创建请求失败: %v", err)
  286. }
  287. // 设置请求头
  288. req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
  289. req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8")
  290. req.Header.Set("Accept-Encoding", "gzip, deflate, br")
  291. req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7")
  292. req.Header.Set("Cookie", cookie)
  293. // 发起请求
  294. resp, err := client.Do(req)
  295. if err != nil {
  296. return nil, fmt.Errorf("请求发送失败: %v", err)
  297. }
  298. defer resp.Body.Close()
  299. // 检查响应状态码
  300. if resp.StatusCode != http.StatusOK {
  301. return nil, fmt.Errorf("请求失败,状态码: %d", resp.StatusCode)
  302. }
  303. // 处理压缩响应
  304. var reader io.Reader = resp.Body
  305. switch resp.Header.Get("Content-Encoding") {
  306. case "gzip":
  307. gzipReader, err := gzip.NewReader(resp.Body)
  308. if err != nil {
  309. return nil, fmt.Errorf("解压 gzip 响应失败: %v", err)
  310. }
  311. defer gzipReader.Close()
  312. reader = gzipReader
  313. case "deflate":
  314. reader = flate.NewReader(resp.Body)
  315. }
  316. // 读取响应内容
  317. content, err := io.ReadAll(reader)
  318. if err != nil {
  319. return nil, fmt.Errorf("读取响应内容失败: %v", err)
  320. }
  321. return content, nil
  322. }