我在从网站获取数据时遇到问题(https://avito.ru)在本地MacBook Air上使用标准golang http库-禁止im获取状态403。从浏览器(使用禁用的JavaScript)打开该站点没有问题,使用标准Curl函数(Curl-v)获取数据也没有问题https://www.avito.ru/moscow)-一直处于状态200。

有一个Golang代码:

package main

import (
    "crypto/tls"
    "crypto/x509"
    "flag"
    "fmt"
    "io/ioutil"
    "net/http/httputil"
    "time”
    "database/sql"
    "log"
    "net/http"

    "golang.org/x/net/http2”
    "github.com/PuerkitoBio/goquery"
)


func main() {

    request, err := http.NewRequest("GET", "https://www.avito.ru/moscow", nil)
    if err != nil {
        log.Fatal(err)
    }
    request.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36")
    request.Header.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
    request.Header.Add("Accept-Language", "en-us")
    request.Header.Add("Connection", "keep-alive")
    request.Header.Add("Host", "www.avito.ru")
    fmt.Println(request.Header)

    // Do something with the request
    client := &http.Client{}
    caCert, err := ioutil.ReadFile("/etc/ssl/cert.pem")
    if err != nil {
        log.Fatalf("Reading server certificate: %s", err)
    }
    caCertPool := x509.NewCertPool()
    caCertPool.AppendCertsFromPEM(caCert)

    var httpVersion = flag.Int("version", 2, "HTTP version")

    fmt.Println(string(*httpVersion))
    client.Transport = &http2.Transport{
        TLSClientConfig: &tls.Config{
            RootCAs:            caCertPool,
            // InsecureSkipVerify: true,
        },
    }

    requestDump, err := httputil.DumpRequest(request, true)
    if err != nil {
        fmt.Println(err)
    }
    fmt.Println(string(requestDump))

    response, err := client.Do(request)
    fmt.Println(string(response.Proto))

    if err != nil {
        log.Fatal(err)
    }

    defer response.Body.Close()

    fmt.Println(response.Status)

    doc, err := goquery.NewDocumentFromReader(response.Body)
    if err != nil {
        fmt.Println("here2")
        log.Fatal(err)
    }
    pageTitle := doc.Find("title").Contents().Text()

    fmt.Println(pageTitle)
}

有两张照片:



GET / HTTP/1.1
Host: www.avito.ru
User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
Accept-Language: en-us
Connection: keep-alive
Accept-Encoding: gzip

HTTP/2.0
map[Content-Type:[text/html; charset=utf-8] Date:[Sat, 14 Nov 2020 13:14:10 GMT] Server:[nginx] X-Content-Type-Options:[nosniff] X-Envoy-Upstream-Service-Time:[14] X-Xss-Protection:[1; mode=block]]
403 Forbidden
here3
Доступ с вашего IP-адреса временно ограничен — Авито
here4
Доступ с вашего IP-адреса временно ограничен — Авито

有Curl-v的打印:


Alexanders-MacBook-Air:~ alexchistyakov$ curl -v https://www.avito.ru/moscow
*   Trying 185.89.12.132...
* TCP_NODELAY set
* Connected to www.avito.ru (185.89.12.132) port 443 (#0)
* ALPN, offering h2
* ALPN, offering http/1.1
* successfully set certificate verify locations:
*   CAfile: /etc/ssl/cert.pem
  CApath: none
* TLSv1.2 (OUT), TLS handshake, Client hello (1):
* TLSv1.2 (IN), TLS handshake, Server hello (2):
* TLSv1.2 (IN), TLS handshake, Certificate (11):
* TLSv1.2 (IN), TLS handshake, Server key exchange (12):
* TLSv1.2 (IN), TLS handshake, Server finished (14):
* TLSv1.2 (OUT), TLS handshake, Client key exchange (16):
* TLSv1.2 (OUT), TLS change cipher, Change cipher spec (1):
* TLSv1.2 (OUT), TLS handshake, Finished (20):
* TLSv1.2 (IN), TLS change cipher, Change cipher spec (1):
* TLSv1.2 (IN), TLS handshake, Finished (20):
* SSL connection using TLSv1.2 / ECDHE-RSA-AES128-GCM-SHA256
* ALPN, server accepted to use h2
* Server certificate:
*  subject: C=RU; ST=Moscow; L=Moscow; O=Limited Liability Company KEH eCommerce; CN=*.avito.ru
*  start date: Feb  4 09:48:54 2019 GMT
*  expire date: Feb  4 09:48:54 2021 GMT
*  subjectAltName: host "www.avito.ru" matched cert's "*.avito.ru"
*  issuer: C=BE; O=GlobalSign nv-sa; CN=GlobalSign Organization Validation CA - SHA256 - G2
*  SSL certificate verify ok.
* Using HTTP2, server supports multi-use
* Connection state changed (HTTP/2 confirmed)
* Copying HTTP/2 data in stream buffer to connection buffer after upgrade: len=0
* Using Stream ID: 1 (easy handle 0x7f7feb00ba00)
> GET /moscow HTTP/2
> Host: www.avito.ru
> User-Agent: curl/7.64.1
> Accept: */*
> 
* Connection state changed (MAX_CONCURRENT_STREAMS == 128)!
< HTTP/2 200 
< server: nginx
< date: Sat, 14 Nov 2020 13:28:28 GMT
< content-type: text/html; charset=UTF-8
< set-cookie: u=2ke25u1o.n9oms3.9cgwbvpc4sw0; path=/; expires=Tue, 10-Nov-37 13:28:28 GMT; HttpOnly; Max-Age=536112000; secure; domain=.avito.ru
< set-cookie: v=1605360508; path=/; expires=Sat, 14-Nov-20 13:58:28 GMT; HttpOnly; Max-Age=1800; secure; domain=.avito.ru; SameSite=Lax
< cache-control: no-store, no-cache, must-revalidate, post-check=0, pre-check=0
< set-cookie: buyer_location_id=621540; expires=Sun, 14-Nov-2021 13:28:28 GMT; Max-Age=31536000; path=/; domain=.avito.ru; secure; HttpOnly; SameSite=Lax
< set-cookie: luri=rossiya; expires=Sun, 15-Nov-2020 13:28:28 GMT; Max-Age=86400; path=/; domain=.avito.ru; secure; HttpOnly; SameSite=Lax
< set-cookie: buyer_selected_search_radius4=0_general; expires=Thu, 18-Mar-3019 13:28:28 GMT; Max-Age=31504464000; path=/; domain=.avito.ru; secure; SameSite=Lax
< set-cookie: tmpBannerHash=; expires=Thu, 01-Jan-1970 00:00:01 GMT; Max-Age=0; path=/; secure; HttpOnly; SameSite=Lax
< set-cookie: tmpSideBlockHash=; expires=Thu, 01-Jan-1970 00:00:01 GMT; Max-Age=0; path=/; secure; HttpOnly; SameSite=Lax
< set-cookie: tmpSearchBottomHash=; expires=Thu, 01-Jan-1970 00:00:01 GMT; Max-Age=0; path=/; secure; HttpOnly; SameSite=Lax
< set-cookie: sx=H4sIAAAAAAACAw3EwQqAIAwA0H%2FZucOiJcu%2FyYiRA4UWDhL%2Fvd7hdVjSu0tpT0lVSQ5CY%2FJ%2Fh9ihQYQr31osaMskfpCIV0UUFkOuajDBCXEOuBKFDXmMD9eve2RUAAAA; expires=Sat, 21-Nov-2020 13:28:28 GMT; Max-Age=604800; path=/; domain=.avito.ru; secure; HttpOnly; SameSite=Lax
< set-cookie: so=1605360508; expires=Sat, 14-Nov-2020 13:43:28 GMT; Max-Age=900; path=/; domain=.avito.ru; secure; HttpOnly; SameSite=Lax
< set-cookie: dfp_group=52; expires=Sat, 14-Nov-2020 13:38:28 GMT; Max-Age=600; path=/; domain=.avito.ru; secure; HttpOnly; SameSite=Lax
< set-cookie: buyer_laas_tooltip=; expires=Thu, 01-Jan-1970 00:00:01 GMT; Max-Age=0; path=/; domain=.avito.ru; secure; HttpOnly; SameSite=Lax
< set-cookie: tmpSrcFromBp=ijrkns6kvj4gwc4ggwok00g8gs08oks; expires=Sat, 14-Nov-2020 14:28:28 GMT; Max-Age=3600; path=/; domain=.avito.ru; secure; HttpOnly; SameSite=Lax
< set-cookie: sessid=e33049edd828b91d73300175d802c306.1605360508; expires=Sun, 15-Nov-2020 13:28:28 GMT; Max-Age=86400; path=/; domain=.avito.ru; secure; HttpOnly; SameSite=Lax
< set-cookie: buyer_from_page=; expires=Thu, 01-Jan-1970 00:00:01 GMT; Max-Age=0; path=/; secure; HttpOnly; SameSite=Lax
< x-request-url: /moscow
< x-envoy-upstream-service-time: 532
< x-xss-protection: 1; mode=block
< x-content-type-options: nosniff
< 

<!DOCTYPE html>
   
<html> <head> <script>

要想像Curl一样在Golang获得200状态,我需要做些什么?谢谢