golang爬虫第一版代码
package gofish
import (
"fmt"
"io"
"net/http"
"net/url"
)
type Request struct {
Url string // 接收的url
Method string // 请求的方式GET或POST
Headers *http.Header // HTTP请求的头部
Body io.Reader // body内容部分
Handle Handle // 处理的方法为Handle
Client http.Client
}
// NewRequest (参数1, 参数2, 参数3 参数3的类型, 参数4 参数4的类型, 参数5 参数5的类型) (返回值1, 返回值2)
func NewRequest (method, Url, userAgent string, handle Handle, body io.Reader) (*Request, error){
_, err := url.Parse(Url)
if err != nil{
return nil, err
}
hdr := http.Header{}
if userAgent != ""{
hdr.Add("User-Agent", userAgent)
}else {
hdr.Add("User-Agent", userAgent)
}
client := http.Client{
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return nil
},
}
return &Request{Url: Url, Method: method, Headers: &hdr, Handle: handle, Client: client}, nil
}
func (r *Request) Do() error{
request, err := http.NewRequest(r.Method, r.Url, r.Body)
if err != nil{
return err
}
request.Header = *r.Headers
resp, err := r.Client.Do(request)
if err != nil{
return err
}
if resp.StatusCode != http.StatusOK{
return fmt.Errorf("error status code: %d", resp.StatusCode)
}
r.Handle.Worker(resp.Body, r.Url)
defer resp.Body.Close()
return nil
}