Written in GO Lang w/ ChromeDP dependency
go get -t -d github.com/chromedp/chromedp and github.com/chromedp/cdproto
next version will include improved headless functionality and background headless false functionality as well as blocking pagination. If you have a feature request add it to the comments below
go get -t -d github.com/chromedp/chromedp and github.com/chromedp/cdproto
next version will include improved headless functionality and background headless false functionality as well as blocking pagination. If you have a feature request add it to the comments below
Код:
package main
import (
"bufio"
"context"
"encoding/json"
"log"
"net/url"
"os"
"path/filepath"
"time"
"github.com/chromedp/cdproto/cdp"
"github.com/chromedp/chromedp"
)
type Result struct {
Link string `json:"link"`
}
func main() {
const (
port = 9515
)
opts := append(chromedp. DefaultExecAllocatorOptions[:],
chromedp. Flag("headless", false),
chromedp. Flag("no-sandbox", true),
chromedp. Flag("disable-dev-shm-usage", true),
)
baseCtx, cancel := context. WithTimeout(context. Background(), 99*time. Second)
defer cancel()
browserCtx, cancel := chromedp. NewExecAllocator(baseCtx, opts...)
defer cancel()
tabCtx, cancel := chromedp. NewContext(browserCtx)
defer cancel()
.log. Print("Enter search query: ")
scanner := bufio. NewScanner(os. Stdin)
scanner. Scan()
input := scanner. Text()
googleURL := url. URL{
Scheme: "https",
Host: "www.google.com",
Path: "/search",
RawQuery: url. Values{"q": {input}, "filter": {"0"}, "num": {"1000"}}. Encode(),
}
err := chromedp. Run(tabCtx,
chromedp. Navigate(googleURL.String()),
chromedp. WaitReady("div#search"),
)
if err != nil {
.log. Fatal(err)
}
var doc []*cdp. Node
err = chromedp. Run(tabCtx,
chromedp. Nodes("div#search div.g a", &doc),
)
if err != nil {
.log. Fatal(err)
}
results := []Result{}
for _, r := range doc {
deadline, ok1 := baseCtx.Deadline()
if ok1 && time. Now(). After(deadline) {
.log. Println("Context deadline exceeded")
break
}
if err := baseCtx.Err(); err != nil {
.log. Println("Context canceled")
break
}
var href string
var ok2 bool
err = chromedp. Run(tabCtx,
chromedp. AttributeValue(r.FullXPath(), "href", &href, &ok2),
)
if err != nil {
.log. Println(err)
continue
}
if ok2 && href != "" {
results = append(results, Result{Link: href})
}
}
.log. Print("Enter output file path (including filename): ")
scanner. Scan()
filePath := scanner. Text()
filePath, err = filepath. Abs(filePath)
if err != nil {
.log. Fatal(err)
}
dir := filepath. Dir(filePath)
err = os. MkdirAll(dir, os. ModePerm)
if err != nil {
.log. Fatal(err)
}
file, err := os. Create(filePath)
if err != nil {
.log. Fatal(err)
}
defer file. Close()
encoder := json. NewEncoder(file)
encoder. SetIndent("", " ")
err = encoder. Encode(results)
if err != nil {
.log. Fatal(err)
}
.log. Printf("Results saved to %s", filePath)
time. Sleep(10 * time. Second)
cancel()
}
{/code]