gameShieldCrawler.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. package service
  2. import (
  3. "bytes"
  4. "compress/flate"
  5. "compress/gzip"
  6. "context"
  7. "crypto/rand"
  8. "crypto/tls"
  9. "encoding/hex"
  10. "encoding/json"
  11. "fmt"
  12. "github.com/PuerkitoBio/goquery"
  13. "github.com/spf13/viper"
  14. "github.com/tidwall/gjson"
  15. "io"
  16. "mime/multipart"
  17. "net/http"
  18. "net/url"
  19. "strings"
  20. "time"
  21. )
  22. type CrawlerService interface {
  23. GetLoginCookie(ctx context.Context) (string, error)
  24. GetFormTokens(ctx context.Context, loginUrl string, cookieHeader string) (map[string]string, error)
  25. SendFormData(ctx context.Context, url string, cookie string, formData map[string]interface{}) ([]byte, error)
  26. GetField(ctx context.Context, appName string) (map[string]interface{}, error)
  27. GetKey(ctx context.Context, appName string) (string, error)
  28. DeleteRule(ctx context.Context, ruleID int, ruleUrl string) (string, error)
  29. FetchPageContent(ctx context.Context, url string, cookie string) ([]byte, error)
  30. }
  31. type CrawlerConfig struct {
  32. Username string
  33. Password string
  34. URL string
  35. KeyURL string
  36. }
  37. func NewCrawlerService(
  38. service *Service,
  39. parser ParserService,
  40. conf *viper.Viper,
  41. ) CrawlerService {
  42. // 创建一个全局HTTP客户端,用于复用连接池
  43. httpClient := &http.Client{
  44. Transport: &http.Transport{
  45. TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
  46. MaxIdleConns: 500, // 最大空闲连接数
  47. MaxIdleConnsPerHost: 300, // 每个主机的最大空闲连接数
  48. IdleConnTimeout: 90 * time.Second, // 空闲连接超时时间
  49. },
  50. Timeout: 30 * time.Second, // 请求超时时间
  51. }
  52. return &crawlerService{
  53. Service: service,
  54. parser: parser,
  55. config: &CrawlerConfig{
  56. Username: conf.GetString("crawler.username"),
  57. Password: conf.GetString("crawler.password"),
  58. URL: conf.GetString("crawler.Url"),
  59. KeyURL: conf.GetString("crawler.keyUrl"),
  60. },
  61. httpClient: httpClient,
  62. }
  63. }
  64. type crawlerService struct {
  65. *Service
  66. parser ParserService
  67. config *CrawlerConfig
  68. httpClient *http.Client
  69. }
  70. // 生成随机字符串
  71. func randomHex(n int) string {
  72. b := make([]byte, n)
  73. _, err := rand.Read(b)
  74. if err != nil {
  75. panic(err)
  76. }
  77. return hex.EncodeToString(b)
  78. }
  79. // 获取登录cookie
  80. func (service *crawlerService) GetLoginCookie(ctx context.Context) (string, error) {
  81. data := url.Values{}
  82. data.Set("username", service.config.Username)
  83. data.Set("password", service.config.Password)
  84. loginUrl := service.config.URL + "admin/signin"
  85. req, err := http.NewRequestWithContext(ctx, "POST", loginUrl, strings.NewReader(data.Encode()))
  86. if err != nil {
  87. return "", fmt.Errorf("操作失败: %v", err)
  88. }
  89. // 添加关键请求头
  90. req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
  91. req.Header.Set("Expect", "")
  92. req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36")
  93. req.Header.Set("Referer", loginUrl)
  94. req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
  95. // 使用已有的HTTP客户端,但添加禁止重定向的选项
  96. client := &http.Client{
  97. // 复制原有客户端的Transport
  98. Transport: service.httpClient.Transport,
  99. // 添加禁止自动跳转的设置
  100. CheckRedirect: func(req *http.Request, via []*http.Request) error {
  101. return http.ErrUseLastResponse // 禁止自动跳转
  102. },
  103. }
  104. resp, err := client.Do(req)
  105. if err != nil {
  106. return "", fmt.Errorf("%v", err)
  107. }
  108. defer resp.Body.Close()
  109. // 输出响应体,调试用
  110. _, err = io.ReadAll(resp.Body)
  111. if err != nil {
  112. return "", fmt.Errorf("读取响应失败: %v", err)
  113. }
  114. // 提取原始 Header 中的 Set-Cookie 字段
  115. rawCookies := resp.Header["Set-Cookie"]
  116. var cookieStr strings.Builder
  117. for _, cookie := range rawCookies {
  118. parts := strings.SplitN(cookie, ";", 2)
  119. if len(parts) > 0 {
  120. cookieStr.WriteString(parts[0] + "; ")
  121. }
  122. }
  123. cookieHeader := strings.TrimRight(cookieStr.String(), "; ")
  124. if cookieHeader == "" {
  125. return "", fmt.Errorf("获取 Cookie 失败")
  126. }
  127. return cookieHeader, nil
  128. }
  129. // 获取表单令牌
  130. func (service *crawlerService) GetFormTokens(ctx context.Context, loginUrl string, cookieHeader string) (map[string]string, error) {
  131. req, err := http.NewRequestWithContext(ctx, "GET", loginUrl, nil)
  132. if err != nil {
  133. return nil, fmt.Errorf("创建请求失败: %v", err)
  134. }
  135. // 设置请求头,包括 Cookie 和 PJAX 头
  136. req.Header.Set("Cookie", cookieHeader)
  137. req.Header.Set("X-PJAX", "true")
  138. req.Header.Set("X-PJAX-Container", "#pjax-container")
  139. req.Header.Set("X-Requested-With", "XMLHttpRequest")
  140. req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36")
  141. // 使用共享的HTTP客户端发起请求
  142. resp, err := service.httpClient.Do(req)
  143. if err != nil {
  144. return nil, fmt.Errorf("请求失败: %v", err)
  145. }
  146. defer resp.Body.Close()
  147. // 使用 goquery 解析 HTML
  148. doc, err := goquery.NewDocumentFromReader(resp.Body)
  149. if err != nil {
  150. return nil, fmt.Errorf("解析 HTML 失败: %v", err)
  151. }
  152. // 提取隐藏字段
  153. previous := doc.Find(`input[name="__go_admin_previous_"]`).AttrOr("value", "默认值")
  154. t := doc.Find(`input[name="__go_admin_t_"]`).AttrOr("value", "默认值")
  155. return map[string]string{
  156. "previous": previous,
  157. "t": t,
  158. }, nil
  159. }
  160. // 发送 POST 请求
  161. func (service *crawlerService) SendFormData(ctx context.Context, url string, cookie string, formData map[string]interface{}) ([]byte, error) {
  162. var buf bytes.Buffer
  163. writer := multipart.NewWriter(&buf)
  164. // 遍历字段添加到 multipart 表单中
  165. for key, val := range formData {
  166. valueStr := fmt.Sprintf("%v", val) // 转为字符串
  167. if err := writer.WriteField(key, valueStr); err != nil {
  168. return nil, fmt.Errorf("写入字段失败: %v", err)
  169. }
  170. }
  171. // 关闭 writer 以完成结尾边界写入
  172. if err := writer.Close(); err != nil {
  173. return nil, fmt.Errorf("关闭 multipart writer 失败: %v", err)
  174. }
  175. // 构造请求
  176. req, err := http.NewRequestWithContext(ctx, "POST", url, &buf)
  177. if err != nil {
  178. return nil, fmt.Errorf("创建请求失败: %v", err)
  179. }
  180. // 设置请求头
  181. req.Header.Set("Content-Type", writer.FormDataContentType())
  182. req.Header.Set("X-PJAX", "true")
  183. req.Header.Set("X-PJAX-Container", "#pjax-container")
  184. req.Header.Set("X-Requested-With", "XMLHttpRequest")
  185. req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64)")
  186. req.Header.Set("Accept", "text/html, */*; q=0.01")
  187. req.Header.Set("Accept-Encoding", "gzip, deflate")
  188. req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7")
  189. req.Header.Set("Cookie", cookie)
  190. req.Header.Set("Expect", "")
  191. // 使用共享的HTTP客户端发起请求
  192. resp, err := service.httpClient.Do(req)
  193. if err != nil {
  194. return nil, fmt.Errorf("请求发送失败: %v", err)
  195. }
  196. defer resp.Body.Close()
  197. // 读取响应
  198. res, err := io.ReadAll(resp.Body)
  199. if err != nil {
  200. return nil, fmt.Errorf("读取响应失败: %v", err)
  201. }
  202. return res, nil
  203. }
  204. // 获取 rule_id
  205. func (service *crawlerService) GetField(ctx context.Context, appName string) (map[string]interface{}, error) {
  206. keyURL := service.config.KeyURL + appName
  207. // 使用共享的HTTP客户端而不是http.Get
  208. req, err := http.NewRequestWithContext(ctx, "GET", keyURL, nil)
  209. if err != nil {
  210. return nil, fmt.Errorf("创建请求失败: %v", err)
  211. }
  212. resp, err := service.httpClient.Do(req)
  213. if err != nil {
  214. return nil, fmt.Errorf("请求失败:%w", err)
  215. }
  216. defer resp.Body.Close()
  217. body, err := io.ReadAll(resp.Body)
  218. if err != nil {
  219. return nil, fmt.Errorf("读取响应体失败:%w", err)
  220. }
  221. // 先用 gjson 拿到 data.raw 对应的原始 JSON
  222. result := gjson.GetBytes(body, "data.raw")
  223. if !result.Exists() {
  224. return nil, fmt.Errorf("响应中缺少 data.raw")
  225. }
  226. // 再把这一段反序列化到 map
  227. var rawMap map[string]interface{}
  228. if err := json.Unmarshal([]byte(result.Raw), &rawMap); err != nil {
  229. return nil, fmt.Errorf("解析 data.raw 失败:%w", err)
  230. }
  231. return rawMap, nil
  232. }
  233. func (service *crawlerService) GetKey(ctx context.Context, appName string) (string, error) {
  234. // 使用共享的HTTP客户端而不是http.Get
  235. req, err := http.NewRequestWithContext(ctx, "GET", service.config.KeyURL+appName, nil)
  236. if err != nil {
  237. return "", fmt.Errorf("创建请求失败: %v", err)
  238. }
  239. resp, err := service.httpClient.Do(req)
  240. if err != nil {
  241. return "", fmt.Errorf("请求失败:%w", err)
  242. }
  243. defer resp.Body.Close()
  244. body, err := io.ReadAll(resp.Body)
  245. if err != nil {
  246. return "", fmt.Errorf("读取响应体失败:%w", err)
  247. }
  248. // 2. 直接从 JSON 路径 data.key 拿字符串
  249. result := gjson.GetBytes(body, "data.key")
  250. if !result.Exists() {
  251. return "", fmt.Errorf("响应中缺少 data.key")
  252. }
  253. return result.String(), nil
  254. }
  255. func (service *crawlerService) DeleteRule(ctx context.Context, ruleID int, ruleUrl string) (string, error) {
  256. // 1. 登录,拿到 Cookie
  257. cookie, err := service.GetLoginCookie(ctx)
  258. if err != nil {
  259. return "", fmt.Errorf("login failed: %w", err)
  260. }
  261. // 2. 构造删除请求 URL 和表单
  262. deleteURL := service.config.URL + ruleUrl
  263. formData := map[string]interface{}{
  264. "id": ruleID,
  265. }
  266. // 3. 发表单(multipart 也支持 x-www-form-urlencoded,你这里用已有的 SendFormData)
  267. respBody, err := service.SendFormData(ctx, deleteURL, cookie, formData)
  268. if err != nil {
  269. return "", err
  270. }
  271. res, err := service.parser.GetMessage(ctx, respBody)
  272. if err != nil {
  273. return "", err
  274. }
  275. return res, nil
  276. }
  277. func (service *crawlerService) FetchPageContent(ctx context.Context, url string, cookie string) ([]byte, error) {
  278. fetchUrl := service.config.URL + url
  279. // 使用服务中的共享HTTP客户端
  280. client := service.httpClient
  281. // 构造请求
  282. req, err := http.NewRequestWithContext(ctx, "GET", fetchUrl, nil)
  283. if err != nil {
  284. return nil, fmt.Errorf("创建请求失败: %v", err)
  285. }
  286. // 设置请求头
  287. req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
  288. req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8")
  289. req.Header.Set("Accept-Encoding", "gzip, deflate, br")
  290. req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7")
  291. req.Header.Set("Cookie", cookie)
  292. // 发起请求
  293. resp, err := client.Do(req)
  294. if err != nil {
  295. return nil, fmt.Errorf("请求发送失败: %v", err)
  296. }
  297. defer resp.Body.Close()
  298. // 检查响应状态码
  299. if resp.StatusCode != http.StatusOK {
  300. return nil, fmt.Errorf("请求失败,状态码: %d", resp.StatusCode)
  301. }
  302. // 处理压缩响应
  303. var reader io.Reader = resp.Body
  304. switch resp.Header.Get("Content-Encoding") {
  305. case "gzip":
  306. gzipReader, err := gzip.NewReader(resp.Body)
  307. if err != nil {
  308. return nil, fmt.Errorf("解压 gzip 响应失败: %v", err)
  309. }
  310. defer gzipReader.Close()
  311. reader = gzipReader
  312. case "deflate":
  313. reader = flate.NewReader(resp.Body)
  314. }
  315. // 读取响应内容
  316. content, err := io.ReadAll(reader)
  317. if err != nil {
  318. return nil, fmt.Errorf("读取响应内容失败: %v", err)
  319. }
  320. return content, nil
  321. }