|
@@ -2,6 +2,8 @@ package service
|
|
|
|
|
|
import (
|
|
|
"bytes"
|
|
|
+ "compress/flate"
|
|
|
+ "compress/gzip"
|
|
|
"context"
|
|
|
"crypto/rand"
|
|
|
"crypto/tls"
|
|
@@ -16,6 +18,7 @@ import (
|
|
|
"net/http"
|
|
|
"net/url"
|
|
|
"strings"
|
|
|
+ "time"
|
|
|
)
|
|
|
|
|
|
type CrawlerService interface {
|
|
@@ -25,6 +28,7 @@ type CrawlerService interface {
|
|
|
GetField(ctx context.Context, appName string) (map[string]interface{}, error)
|
|
|
GetKey(ctx context.Context, appName string) (string, error)
|
|
|
DeleteRule(ctx context.Context, ruleID int, ruleUrl string) (string, error)
|
|
|
+ FetchPageContent(ctx context.Context, url string, cookie string) ([]byte, error)
|
|
|
}
|
|
|
|
|
|
type CrawlerConfig struct {
|
|
@@ -297,3 +301,64 @@ func (service *crawlerService) DeleteRule(ctx context.Context, ruleID int, ruleU
|
|
|
|
|
|
return res, nil
|
|
|
}
|
|
|
+
|
|
|
+func (service *crawlerService) FetchPageContent(ctx context.Context, url string, cookie string) ([]byte, error) {
|
|
|
+ fetchUrl := service.config.URL + url
|
|
|
+ // 配置 HTTP 客户端
|
|
|
+ client := &http.Client{
|
|
|
+ Transport: &http.Transport{
|
|
|
+ TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
|
|
+ MaxIdleConns: 100,
|
|
|
+ MaxIdleConnsPerHost: 100,
|
|
|
+ IdleConnTimeout: 90 * time.Second,
|
|
|
+ },
|
|
|
+ Timeout: 30 * time.Second,
|
|
|
+ }
|
|
|
+
|
|
|
+ // 构造请求
|
|
|
+ req, err := http.NewRequestWithContext(ctx, "GET", fetchUrl, nil)
|
|
|
+ if err != nil {
|
|
|
+ return nil, fmt.Errorf("创建请求失败: %v", err)
|
|
|
+ }
|
|
|
+
|
|
|
+ // 设置请求头
|
|
|
+ req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
|
|
|
+ req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8")
|
|
|
+ req.Header.Set("Accept-Encoding", "gzip, deflate, br")
|
|
|
+ req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7")
|
|
|
+ req.Header.Set("Cookie", cookie)
|
|
|
+
|
|
|
+ // 发起请求
|
|
|
+ resp, err := client.Do(req)
|
|
|
+ if err != nil {
|
|
|
+ return nil, fmt.Errorf("请求发送失败: %v", err)
|
|
|
+ }
|
|
|
+ defer resp.Body.Close()
|
|
|
+
|
|
|
+ // 检查响应状态码
|
|
|
+ if resp.StatusCode != http.StatusOK {
|
|
|
+ return nil, fmt.Errorf("请求失败,状态码: %d", resp.StatusCode)
|
|
|
+ }
|
|
|
+
|
|
|
+ // 处理压缩响应
|
|
|
+ var reader io.Reader = resp.Body
|
|
|
+ switch resp.Header.Get("Content-Encoding") {
|
|
|
+ case "gzip":
|
|
|
+ gzipReader, err := gzip.NewReader(resp.Body)
|
|
|
+ if err != nil {
|
|
|
+ return nil, fmt.Errorf("解压 gzip 响应失败: %v", err)
|
|
|
+ }
|
|
|
+ defer gzipReader.Close()
|
|
|
+ reader = gzipReader
|
|
|
+ case "deflate":
|
|
|
+ reader = flate.NewReader(resp.Body)
|
|
|
+ }
|
|
|
+
|
|
|
+ // 读取响应内容
|
|
|
+ content, err := io.ReadAll(reader)
|
|
|
+ if err != nil {
|
|
|
+ return nil, fmt.Errorf("读取响应内容失败: %v", err)
|
|
|
+ }
|
|
|
+
|
|
|
+ return content, nil
|
|
|
+}
|