get-cate.js 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. const webdriver = require('selenium-webdriver');
  2. const chrome = require('selenium-webdriver/chrome');
  3. const firefox = require('selenium-webdriver/firefox');
  4. const { saveFile } = require('./tools/save-file');
  5. const cheerio = require('cheerio');
  6. (async function example() {
  7. // let driver = await new Builder().forBrowser('chrome').build();
  8. let driver = new webdriver.Builder()
  9. .forBrowser('firefox')
  10. .build();
  11. driver.get('https://lumn.tmall.com/search.htm?spm=a1z10.3-b-s.w5002-15438998724.1.248c3b51L3CXmJ&search=y');
  12. // 检查登录
  13. await driver.wait(webdriver.until.elementLocated(webdriver.By.className('j_Username')), 100000, '超时未登录');
  14. console.log('登陆成功');
  15. await driver.sleep(10000)
  16. console.log('等待');
  17. // 存储原始窗口的 ID
  18. const originalWindow = await driver.getWindowHandle();
  19. await driver.sleep(1500)
  20. // 获取分类列表
  21. let cateArr = [];
  22. const cateIndex = 0;
  23. // 判断是否有验证框
  24. try {
  25. const dialog = await driver.findElement(webdriver.By.className('baxia-dialog'));
  26. if (dialog) {
  27. const ele = await driver.findElement(webdriver.By.js(() => document.querySelector('.baxia-dialog .baxia-dialog-close')))
  28. ele.click();
  29. }
  30. console.log('有验证框');
  31. } catch (error) {
  32. }
  33. async function eachCate(cateIndex) {
  34. const cateList = await driver.findElements(webdriver.By.js(() => document.querySelectorAll('.cateAttrs li a')));
  35. // 确定当前分类
  36. const curCate = cateList[cateIndex];
  37. const cateName = await curCate.getText();
  38. let cateObj = {
  39. name: cateName,
  40. subType: []
  41. }
  42. const url = await curCate.getAttribute('href');
  43. // 切换新窗口打开分类
  44. await driver.switchTo().newWindow('tab');
  45. const cateWindow = await driver.getWindowHandle();
  46. await driver.get(url);
  47. await driver.sleep(2000)
  48. // 判断是否有验证框
  49. try {
  50. const dialog = await driver.findElement(webdriver.By.className('baxia-dialog'));
  51. if (dialog) {
  52. const ele = await driver.findElement(webdriver.By.js(() => document.querySelector('.baxia-dialog .baxia-dialog-close')))
  53. ele.click();
  54. }
  55. console.log('有验证框');
  56. } catch (error) {
  57. }
  58. // 判断是否有二级分类
  59. const cate2List = await driver.findElements(webdriver.By.js(() => document.querySelectorAll('.cateAttrs li a')));
  60. await driver.sleep(500)
  61. let arr = [];
  62. if (cate2List.length) {
  63. await driver.getPageSource().then(async htmlStr => {
  64. const $ = cheerio.load(htmlStr);
  65. $('.cateAttrs li a').each((i, e) => {
  66. arr.push({
  67. name: $(e).text()
  68. });
  69. });
  70. });
  71. }
  72. await driver.sleep(2000)
  73. cateObj.subType = arr;
  74. cateArr.push(cateObj)
  75. await driver.close();
  76. await driver.switchTo().window(originalWindow)
  77. cateIndex++;
  78. console.log('开始' + cateIndex, cateList.length, JSON.stringify(cateObj))
  79. if (cateIndex < cateList.length) {
  80. eachCate(cateIndex);
  81. } else {
  82. saveFile('/cate/cate.json', JSON.stringify(cateArr,null,4))
  83. }
  84. }
  85. eachCate(cateIndex);
  86. })();