123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116 |
- const webdriver = require('selenium-webdriver');
- const chrome = require('selenium-webdriver/chrome');
- const firefox = require('selenium-webdriver/firefox');
- const { saveFile } = require('./tools/save-file');
- const cheerio = require('cheerio');
- (async function example() {
- // let driver = await new Builder().forBrowser('chrome').build();
- let driver = new webdriver.Builder()
- .forBrowser('firefox')
- .build();
- driver.get('https://lumn.tmall.com/search.htm?spm=a1z10.3-b-s.w5002-15438998724.1.248c3b51L3CXmJ&search=y');
- // 检查登录
- await driver.wait(webdriver.until.elementLocated(webdriver.By.className('j_Username')), 100000, '超时未登录');
- console.log('登陆成功');
- await driver.sleep(10000)
- console.log('等待');
- // 存储原始窗口的 ID
- const originalWindow = await driver.getWindowHandle();
- await driver.sleep(1500)
- // 获取分类列表
- let cateArr = [];
- const cateIndex = 0;
- // 判断是否有验证框
- try {
- const dialog = await driver.findElement(webdriver.By.className('baxia-dialog'));
- if (dialog) {
- const ele = await driver.findElement(webdriver.By.js(() => document.querySelector('.baxia-dialog .baxia-dialog-close')))
- ele.click();
- }
- console.log('有验证框');
- } catch (error) {
- }
- async function eachCate(cateIndex) {
- const cateList = await driver.findElements(webdriver.By.js(() => document.querySelectorAll('.cateAttrs li a')));
- // 确定当前分类
- const curCate = cateList[cateIndex];
- const cateName = await curCate.getText();
- let cateObj = {
- name: cateName,
- subType: []
- }
- const url = await curCate.getAttribute('href');
- // 切换新窗口打开分类
- await driver.switchTo().newWindow('tab');
- const cateWindow = await driver.getWindowHandle();
- await driver.get(url);
- await driver.sleep(2000)
- // 判断是否有验证框
- try {
- const dialog = await driver.findElement(webdriver.By.className('baxia-dialog'));
- if (dialog) {
- const ele = await driver.findElement(webdriver.By.js(() => document.querySelector('.baxia-dialog .baxia-dialog-close')))
- ele.click();
- }
- console.log('有验证框');
- } catch (error) {
- }
- // 判断是否有二级分类
- const cate2List = await driver.findElements(webdriver.By.js(() => document.querySelectorAll('.cateAttrs li a')));
- await driver.sleep(500)
- let arr = [];
- if (cate2List.length) {
- await driver.getPageSource().then(async htmlStr => {
- const $ = cheerio.load(htmlStr);
- $('.cateAttrs li a').each((i, e) => {
- arr.push({
- name: $(e).text()
- });
- });
- });
- }
- await driver.sleep(2000)
- cateObj.subType = arr;
- cateArr.push(cateObj)
- await driver.close();
- await driver.switchTo().window(originalWindow)
- cateIndex++;
- console.log('开始' + cateIndex, cateList.length, JSON.stringify(cateObj))
- if (cateIndex < cateList.length) {
- eachCate(cateIndex);
- } else {
- saveFile('/cate/cate.json', JSON.stringify(cateArr,null,4))
- }
- }
- eachCate(cateIndex);
- })();
|