scrapy+selenuim 登录企查查
项目在获取企查查私密数据时需要登录企查查,所以单独部署一个登录节点,专门负责登录企查查,保存cookie到redis中
主要代码如下:
# 登录
class LoginSpider(scrapy.Spider):
name = 'loginQcc'
allowed_domains = ['qcc.com']
start_urls = ['https://www.qcc.com']
def parse(self, response):
redis_pool = RedisUtil()
users = settings.USER_ACCOUNT
for user in users:
u = user.split('-')
# 登录企查查获取cookie
su = SelenuimUitl()
listcookies = su.login(u[0], u[1])
# cookie 保存
redis_pool.lpush('qcc_cookies', json.dumps(listcookies))
# 登录企查查、保存cookie
def login(self, phone, pwd):
self.browser.get("https://www.qichacha.com/user_login")
time.sleep(3)
self.browser.find_element_by_xpath('//*[@id = "normalLogin"]').click() # 转到登录界面
self.browser.find_element_by_xpath('//*[@id = "nameNormal"]').send_keys(phone) # 账号
self.browser.find_element_by_xpath('//*[@id = "pwdNormal"]').send_keys(pwd) # 密码
time.sleep(1)
button = self.browser.find_element_by_id("nc_1_n1z")
action = ActionChains(self.browser) # 实例化一个action对象
action.click_and_hold(button).perform() # perform()用来执行ActionChains中存储的行为
action.move_by_offset(308, 0).perform()
action.reset_actions()
time.sleep(5)
self.browser.find_element_by_xpath('//*[@id="user_login_normal"]/button').click() # 点击登录
time.sleep(5)
dictCookies = self.browser.get_cookies()
return dictCookies
def area(self):
self.browser.get("https://www.qichacha.com/user_login")
time.sleep(1)
self.browser.find_element_by_xpath('//*[@id = "normalLogin"]').click() # 转到登录界面
self.browser.find_element_by_xpath('//*[@id = "nameNormal"]').send_keys('15200223978') # 账号
self.browser.find_element_by_xpath('//*[@id = "pwdNormal"]').send_keys('15200223978abc') # 密码
button = self.browser.find_element_by_id("nc_1_n1z")
action = ActionChains(self.browser) # 实例化一个action对象
action.click_and_hold(button).perform() # perform()用来执行ActionChains中存储的行为
action.move_by_offset(348, 0).perform()
action.reset_actions()
time.sleep(1)
self.browser.find_element_by_xpath('//*[@id="user_login_normal"]/button').click() # 点击登录
time.sleep(3)
self.browser.get('https://www.qcc.com/web/elib/newcompany')
time.sleep(2)
return HtmlResponse(request.url, body=self.browser.page_source)