用 selenium 调用桌面浏览器

要配合 headless browser 来使用,PhantomJS 比较老了就不说了,记录一下 FireFox Chrome

FireFox

yum install gtk3 libXt
# 去官网找 Linux 版本
wget https://download-ssl.firefox.com.cn/releases/firefox/63.0/zh-CN/Firefox-latest-x86_64.tar.bz2
# 去 github 找 driver,搜 geckodriver
wget https://github.com/mozilla/geckodriver/releases/download/v0.23.0/geckodriver-v0.23.0-linux64.tar.gz

Chrome

# 借助 puppeteer
npm i -g puppeteer
ln -s ~/node-v8.12.0-linux-x64/lib/node_modules/puppeteer/.local-chromium/linux-594312/chrome-linux/chrome .
# 搜 chromedriver
wget https://chromedriver.storage.googleapis.com/2.43/chromedriver_linux64.zip

中文字体

yum search fonts
yum install -y wqy-microhei-fonts
yum install -y bitmap-fangsongti-fonts
yum install -y fontconfig

部分例子

from selenium import webdriver

# Firefox
options = webdriver.FirefoxOptions()
options.headless=True
browser = webdriver.Firefox(options=options)

url = "https://login.10086.cn/"
browser.get(url)

p_name = browser.find_element_by_css_selector("#p_name")
no = "13344445555"
p_name.clear()
p_name.send_keys(no)

label = browser.find_element_by_css_selector("#service_login_main label")
label.click()

btn = browser.find_element_by_css_selector("#getSMSPwd")
for i in range(1, 10):
    print(i)
    time.sleep(0.1 * i)
    if btn.is_displayed():
        break
btn.click()

time.sleep(0.2)

# cookies
cookies = browser.get_cookies()
print(cookies)

# screenshot
fn = '10086.png'
browser.save_screenshot(fn)

# do not forget it
browser.quit()


# Chrome
options = webdriver.ChromeOptions()
options.headless = True
options.add_argument('--disable-extensions')
options.add_argument('--disable-gpu')  
options.add_argument('--no-sandbox')  
browser = webdriver.Chrome(options=options)

清理孤儿浏览器进程

function clean_orphan() {
JUNK=$(ps axo pid,ppid,comm | awk '$2==1' | grep $1 | awk '{print $1}')
if [ -n "${JUNK}" ]; then
    date && echo kill $JUNK
    kill $JUNK
fi
}

while true; do
clean_orphan geckodriver
sleep 3
clean_orphan firefox
sleep 3
done

exit

ps axo pid,ppid,comm | grep geckodriver | awk '$2==1'
grep firefox | awk '{print $1}' | xargs kill