gameShieldCrawler.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. package service
  2. import (
  3. "bytes"
  4. "compress/flate"
  5. "compress/gzip"
  6. "context"
  7. "crypto/rand"
  8. "crypto/tls"
  9. "encoding/hex"
  10. "encoding/json"
  11. "fmt"
  12. "github.com/PuerkitoBio/goquery"
  13. "github.com/spf13/viper"
  14. "github.com/tidwall/gjson"
  15. "io"
  16. "mime/multipart"
  17. "net/http"
  18. "net/url"
  19. "strings"
  20. "time"
  21. )
  22. type CrawlerService interface {
  23. GetLoginCookie(ctx context.Context) (string, error)
  24. GetFormTokens(ctx context.Context, loginUrl string, cookieHeader string) (map[string]string, error)
  25. SendFormData(ctx context.Context, url string, cookie string, formData map[string]interface{}) ([]byte, error)
  26. GetField(ctx context.Context, appName string) (map[string]interface{}, error)
  27. GetKey(ctx context.Context, appName string) (string, error)
  28. DeleteRule(ctx context.Context, ruleID int, ruleUrl string) (string, error)
  29. FetchPageContent(ctx context.Context, url string, cookie string) ([]byte, error)
  30. }
  31. type CrawlerConfig struct {
  32. Username string
  33. Password string
  34. URL string
  35. KeyURL string
  36. }
  37. func NewCrawlerService(
  38. service *Service,
  39. parser ParserService,
  40. conf *viper.Viper,
  41. ) CrawlerService {
  42. return &crawlerService{
  43. Service: service,
  44. parser: parser,
  45. config: &CrawlerConfig{
  46. Username: conf.GetString("crawler.username"),
  47. Password: conf.GetString("crawler.password"),
  48. URL: conf.GetString("crawler.Url"),
  49. KeyURL: conf.GetString("crawler.keyUrl"),
  50. },
  51. }
  52. }
  53. type crawlerService struct {
  54. *Service
  55. parser ParserService
  56. config *CrawlerConfig
  57. }
  58. // 生成随机字符串
  59. func randomHex(n int) string {
  60. b := make([]byte, n)
  61. _, err := rand.Read(b)
  62. if err != nil {
  63. panic(err)
  64. }
  65. return hex.EncodeToString(b)
  66. }
  67. // 获取登录cookie
  68. func (service *crawlerService) GetLoginCookie(ctx context.Context) (string, error) {
  69. data := url.Values{}
  70. data.Set("username", service.config.Username)
  71. data.Set("password", service.config.Password)
  72. loginUrl := service.config.URL + "admin/signin"
  73. req, err := http.NewRequestWithContext(ctx, "POST", loginUrl, strings.NewReader(data.Encode()))
  74. if err != nil {
  75. return "", fmt.Errorf("操作失败: %v", err)
  76. }
  77. // 添加关键请求头
  78. req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
  79. req.Header.Set("Expect", "")
  80. req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36")
  81. req.Header.Set("Referer", loginUrl)
  82. req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
  83. client := &http.Client{
  84. CheckRedirect: func(req *http.Request, via []*http.Request) error {
  85. return http.ErrUseLastResponse // 禁止自动跳转
  86. },
  87. Transport: &http.Transport{
  88. TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
  89. },
  90. }
  91. resp, err := client.Do(req)
  92. if err != nil {
  93. return "", fmt.Errorf("%v", err)
  94. }
  95. defer resp.Body.Close()
  96. // 输出响应体,调试用
  97. _, err = io.ReadAll(resp.Body)
  98. if err != nil {
  99. return "", fmt.Errorf("读取响应失败: %v", err)
  100. }
  101. // 提取原始 Header 中的 Set-Cookie 字段
  102. rawCookies := resp.Header["Set-Cookie"]
  103. var cookieStr strings.Builder
  104. for _, cookie := range rawCookies {
  105. parts := strings.SplitN(cookie, ";", 2)
  106. if len(parts) > 0 {
  107. cookieStr.WriteString(parts[0] + "; ")
  108. }
  109. }
  110. cookieHeader := strings.TrimRight(cookieStr.String(), "; ")
  111. if cookieHeader == "" {
  112. return "", fmt.Errorf("获取 Cookie 失败")
  113. }
  114. return cookieHeader, nil
  115. }
  116. // 获取表单令牌
  117. func (service *crawlerService) GetFormTokens(ctx context.Context, loginUrl string, cookieHeader string) (map[string]string, error) {
  118. req, err := http.NewRequestWithContext(ctx, "GET", loginUrl, nil)
  119. if err != nil {
  120. return nil, fmt.Errorf("创建请求失败: %v", err)
  121. }
  122. // 设置请求头,包括 Cookie 和 PJAX 头
  123. req.Header.Set("Cookie", cookieHeader)
  124. req.Header.Set("X-PJAX", "true")
  125. req.Header.Set("X-PJAX-Container", "#pjax-container")
  126. req.Header.Set("X-Requested-With", "XMLHttpRequest")
  127. req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36")
  128. // HTTP 客户端,跳过 SSL 验证
  129. client := &http.Client{
  130. Transport: &http.Transport{
  131. TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
  132. },
  133. }
  134. // 发送请求
  135. resp, err := client.Do(req)
  136. if err != nil {
  137. return nil, fmt.Errorf("请求失败: %v", err)
  138. }
  139. defer resp.Body.Close()
  140. // 使用 goquery 解析 HTML
  141. doc, err := goquery.NewDocumentFromReader(resp.Body)
  142. if err != nil {
  143. return nil, fmt.Errorf("解析 HTML 失败: %v", err)
  144. }
  145. // 提取隐藏字段
  146. previous := doc.Find(`input[name="__go_admin_previous_"]`).AttrOr("value", "默认值")
  147. t := doc.Find(`input[name="__go_admin_t_"]`).AttrOr("value", "默认值")
  148. return map[string]string{
  149. "previous": previous,
  150. "t": t,
  151. }, nil
  152. }
  153. // 发送 POST 请求
  154. func (service *crawlerService) SendFormData(ctx context.Context, url string, cookie string, formData map[string]interface{}) ([]byte, error) {
  155. var buf bytes.Buffer
  156. writer := multipart.NewWriter(&buf)
  157. // 遍历字段添加到 multipart 表单中
  158. for key, val := range formData {
  159. valueStr := fmt.Sprintf("%v", val) // 转为字符串
  160. if err := writer.WriteField(key, valueStr); err != nil {
  161. return nil, fmt.Errorf("写入字段失败: %v", err)
  162. }
  163. }
  164. // 关闭 writer 以完成结尾边界写入
  165. if err := writer.Close(); err != nil {
  166. return nil, fmt.Errorf("关闭 multipart writer 失败: %v", err)
  167. }
  168. // 构造请求
  169. req, err := http.NewRequestWithContext(ctx, "POST", url, &buf)
  170. if err != nil {
  171. return nil, fmt.Errorf("创建请求失败: %v", err)
  172. }
  173. // 设置请求头
  174. req.Header.Set("Content-Type", writer.FormDataContentType())
  175. req.Header.Set("X-PJAX", "true")
  176. req.Header.Set("X-PJAX-Container", "#pjax-container")
  177. req.Header.Set("X-Requested-With", "XMLHttpRequest")
  178. req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64)")
  179. req.Header.Set("Accept", "text/html, */*; q=0.01")
  180. req.Header.Set("Accept-Encoding", "gzip, deflate")
  181. req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7")
  182. req.Header.Set("Cookie", cookie)
  183. req.Header.Set("Expect", "")
  184. // 跳过 SSL 验证的 HTTP 客户端
  185. client := &http.Client{
  186. Transport: &http.Transport{
  187. TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
  188. },
  189. }
  190. // 发起请求
  191. resp, err := client.Do(req)
  192. if err != nil {
  193. return nil, fmt.Errorf("请求发送失败: %v", err)
  194. }
  195. defer resp.Body.Close()
  196. // 读取响应
  197. res, err := io.ReadAll(resp.Body)
  198. if err != nil {
  199. return nil, fmt.Errorf("读取响应失败: %v", err)
  200. }
  201. return res, nil
  202. }
  203. // 获取 rule_id
  204. func (service *crawlerService) GetField(ctx context.Context, appName string) (map[string]interface{}, error) {
  205. keyURL := service.config.KeyURL + appName
  206. resp, err := http.Get(keyURL)
  207. if err != nil {
  208. return nil, fmt.Errorf("请求失败:%w", err)
  209. }
  210. defer resp.Body.Close()
  211. body, err := io.ReadAll(resp.Body)
  212. if err != nil {
  213. return nil, fmt.Errorf("读取响应体失败:%w", err)
  214. }
  215. // 先用 gjson 拿到 data.raw 对应的原始 JSON
  216. result := gjson.GetBytes(body, "data.raw")
  217. if !result.Exists() {
  218. return nil, fmt.Errorf("响应中缺少 data.raw")
  219. }
  220. // 再把这一段反序列化到 map
  221. var rawMap map[string]interface{}
  222. if err := json.Unmarshal([]byte(result.Raw), &rawMap); err != nil {
  223. return nil, fmt.Errorf("解析 data.raw 失败:%w", err)
  224. }
  225. return rawMap, nil
  226. }
  227. func (service *crawlerService) GetKey(ctx context.Context, appName string) (string, error) {
  228. resp, err := http.Get(service.config.KeyURL + appName)
  229. if err != nil {
  230. return "", fmt.Errorf("请求失败:%w", err)
  231. }
  232. defer resp.Body.Close()
  233. body, err := io.ReadAll(resp.Body)
  234. if err != nil {
  235. return "", fmt.Errorf("读取响应体失败:%w", err)
  236. }
  237. // 2. 直接从 JSON 路径 data.key 拿字符串
  238. result := gjson.GetBytes(body, "data.key")
  239. if !result.Exists() {
  240. return "", fmt.Errorf("响应中缺少 data.key")
  241. }
  242. return result.String(), nil
  243. }
  244. func (service *crawlerService) DeleteRule(ctx context.Context, ruleID int, ruleUrl string) (string, error) {
  245. // 1. 登录,拿到 Cookie
  246. cookie, err := service.GetLoginCookie(ctx)
  247. if err != nil {
  248. return "", fmt.Errorf("login failed: %w", err)
  249. }
  250. // 2. 构造删除请求 URL 和表单
  251. deleteURL := service.config.URL + ruleUrl
  252. formData := map[string]interface{}{
  253. "id": ruleID,
  254. }
  255. // 3. 发表单(multipart 也支持 x-www-form-urlencoded,你这里用已有的 SendFormData)
  256. respBody, err := service.SendFormData(ctx, deleteURL, cookie, formData)
  257. if err != nil {
  258. return "", err
  259. }
  260. res, err := service.parser.GetMessage(ctx, respBody)
  261. if err != nil {
  262. return "", err
  263. }
  264. return res, nil
  265. }
  266. func (service *crawlerService) FetchPageContent(ctx context.Context, url string, cookie string) ([]byte, error) {
  267. fetchUrl := service.config.URL + url
  268. // 配置 HTTP 客户端
  269. client := &http.Client{
  270. Transport: &http.Transport{
  271. TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
  272. MaxIdleConns: 100,
  273. MaxIdleConnsPerHost: 100,
  274. IdleConnTimeout: 90 * time.Second,
  275. },
  276. Timeout: 30 * time.Second,
  277. }
  278. // 构造请求
  279. req, err := http.NewRequestWithContext(ctx, "GET", fetchUrl, nil)
  280. if err != nil {
  281. return nil, fmt.Errorf("创建请求失败: %v", err)
  282. }
  283. // 设置请求头
  284. req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
  285. req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8")
  286. req.Header.Set("Accept-Encoding", "gzip, deflate, br")
  287. req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7")
  288. req.Header.Set("Cookie", cookie)
  289. // 发起请求
  290. resp, err := client.Do(req)
  291. if err != nil {
  292. return nil, fmt.Errorf("请求发送失败: %v", err)
  293. }
  294. defer resp.Body.Close()
  295. // 检查响应状态码
  296. if resp.StatusCode != http.StatusOK {
  297. return nil, fmt.Errorf("请求失败,状态码: %d", resp.StatusCode)
  298. }
  299. // 处理压缩响应
  300. var reader io.Reader = resp.Body
  301. switch resp.Header.Get("Content-Encoding") {
  302. case "gzip":
  303. gzipReader, err := gzip.NewReader(resp.Body)
  304. if err != nil {
  305. return nil, fmt.Errorf("解压 gzip 响应失败: %v", err)
  306. }
  307. defer gzipReader.Close()
  308. reader = gzipReader
  309. case "deflate":
  310. reader = flate.NewReader(resp.Body)
  311. }
  312. // 读取响应内容
  313. content, err := io.ReadAll(reader)
  314. if err != nil {
  315. return nil, fmt.Errorf("读取响应内容失败: %v", err)
  316. }
  317. return content, nil
  318. }