const webdriver = require('selenium-webdriver'); const chrome = require('selenium-webdriver/chrome'); const firefox = require('selenium-webdriver/firefox'); const { saveFile } = require('./tools/save-file'); const cheerio = require('cheerio'); (async function example() { // let driver = await new Builder().forBrowser('chrome').build(); let driver = new webdriver.Builder() .forBrowser('firefox') .build(); driver.get('https://lumn.tmall.com/search.htm?spm=a1z10.3-b-s.w5002-15438998724.1.248c3b51L3CXmJ&search=y'); // 检查登录 await driver.wait(webdriver.until.elementLocated(webdriver.By.className('j_Username')), 100000, '超时未登录'); console.log('登陆成功'); await driver.sleep(10000) console.log('等待'); // 存储原始窗口的 ID const originalWindow = await driver.getWindowHandle(); await driver.sleep(1500) // 获取分类列表 let cateArr = []; const cateIndex = 0; // 判断是否有验证框 try { const dialog = await driver.findElement(webdriver.By.className('baxia-dialog')); if (dialog) { const ele = await driver.findElement(webdriver.By.js(() => document.querySelector('.baxia-dialog .baxia-dialog-close'))) ele.click(); } console.log('有验证框'); } catch (error) { } async function eachCate(cateIndex) { const cateList = await driver.findElements(webdriver.By.js(() => document.querySelectorAll('.cateAttrs li a'))); // 确定当前分类 const curCate = cateList[cateIndex]; const cateName = await curCate.getText(); let cateObj = { name: cateName, subType: [] } const url = await curCate.getAttribute('href'); // 切换新窗口打开分类 await driver.switchTo().newWindow('tab'); const cateWindow = await driver.getWindowHandle(); await driver.get(url); await driver.sleep(2000) // 判断是否有验证框 try { const dialog = await driver.findElement(webdriver.By.className('baxia-dialog')); if (dialog) { const ele = await driver.findElement(webdriver.By.js(() => document.querySelector('.baxia-dialog .baxia-dialog-close'))) ele.click(); } console.log('有验证框'); } catch (error) { } // 判断是否有二级分类 const cate2List = await driver.findElements(webdriver.By.js(() => document.querySelectorAll('.cateAttrs li a'))); await driver.sleep(500) let arr = []; if (cate2List.length) { await driver.getPageSource().then(async htmlStr => { const $ = cheerio.load(htmlStr); $('.cateAttrs li a').each((i, e) => { arr.push({ name: $(e).text() }); }); }); } await driver.sleep(2000) cateObj.subType = arr; cateArr.push(cateObj) await driver.close(); await driver.switchTo().window(originalWindow) cateIndex++; console.log('开始' + cateIndex, cateList.length, JSON.stringify(cateObj)) if (cateIndex < cateList.length) { eachCate(cateIndex); } else { saveFile('/cate.json', JSON.stringify(cateArr,null,4),'cate/') } } eachCate(cateIndex); })();