Скрапер результатов выдачи гугла
Без стороннего API, долго искал решение - нашел решение в undetected chromedriver
Без стороннего API, долго искал решение - нашел решение в undetected chromedriver
Python:
import time
import random
import argparse
from selenium.webdriver.common.by import By
import undetected_chromedriver as uc
import os
def get_google_links(query, max_pages=10):
search = query
search_url = f'https://www.google.com/search?q={search}'
options = uc.ChromeOptions()
options.add_argument('--headless=new')
options.add_argument('--no-sandbox')
options.add_argument('--disable-blink-features=AutomationControlled')
options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36')
driver = uc.Chrome(options=options)
all_links = []
try:
driver.get(search_url)
time.sleep(random.uniform(2, 4))
for page in range(max_pages):
results = driver.find_elements(By.CSS_SELECTOR, 'a')
for r in results:
href = r.get_attribute('href')
if href and 'google.com' not in href and href.startswith('http'):
all_links.append(href)
try:
next_btn = driver.find_element(By.ID, 'pnnext')
next_btn.click()
time.sleep(random.uniform(2, 4))
except Exception:
break
finally:
driver.quit()
return list(set(all_links))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Google SERP parser.")
parser.add_argument('-q', '--query', required=True)
parser.add_argument('--max-pages', type=int, default=10)
parser.add_argument('--output', required=True, help='Output file')
args = parser.parse_args()
links = get_google_links(args.query, args.max_pages)
with open(args.output, "w", encoding="utf-8") as f:
for link in links:
f.write(link + "\n")