Browse Source

增加分类

cr 4 years ago
parent
commit
24574416b3
9 changed files with 358 additions and 198 deletions
  1. 86 0
      app-cate.js
  2. 63 47
      app-list.js
  3. 12 150
      debug.log
  4. 115 0
      get-cate.js
  5. 22 0
      mock_data/分类模型.json
  6. BIN
      msedgedriver.exe
  7. 5 1
      tools/base-data.js
  8. 54 0
      tools/each-list.js
  9. 1 0
      tools/save-file.js

+ 86 - 0
app-cate.js

@@ -0,0 +1,86 @@
1
+const webdriver = require('selenium-webdriver');
2
+const chrome = require('selenium-webdriver/chrome');
3
+const firefox = require('selenium-webdriver/firefox');
4
+
5
+const { eachList } = require('./tools/each-list');
6
+
7
+
8
+(async function example() {
9
+    // let driver = await new Builder().forBrowser('chrome').build();
10
+    let driver = new webdriver.Builder()
11
+        .forBrowser('chrome')
12
+        .build();
13
+
14
+    driver.get('https://lumn.tmall.com/search.htm?spm=a1z10.3-b-s.w5002-15438998724.1.248c3b51L3CXmJ&search=y');
15
+
16
+    let configObj = {
17
+        originalWindow: '',
18
+
19
+        cateName: '',
20
+        cateWindow: '',
21
+        cateIndex: 0,
22
+        cateListNum: 0,
23
+
24
+        cate2Exist: false,
25
+        cate2Name: '',
26
+        catewWindow: '',
27
+        cate2Index: 0,
28
+        cate2ListNum: 0,
29
+    }
30
+
31
+    global.configObj = configObj;
32
+
33
+    // 检查登录
34
+    await driver.wait(webdriver.until.elementLocated(webdriver.By.className('j_Username')), 100000, '超时未登录');
35
+    console.log('登陆成功');
36
+
37
+    // 存储原始窗口的 ID
38
+    configObj.originalWindow = await driver.getWindowHandle();
39
+
40
+    await driver.sleep(1500)
41
+
42
+    // 获取分类列表
43
+    const cateList = await driver.findElements(webdriver.By.js(() => document.querySelectorAll('.cateAttrs li a')));
44
+    configObj.cateIndex = 0;
45
+    configObj.cateListNum = cateList.length;
46
+
47
+    // 确定当前分类
48
+    const curCate = cateList[configObj.cateIndex];
49
+    configObj.cateName = await curCate.getText();
50
+    const url = await curCate.getAttribute('href');
51
+
52
+    console.log(url)
53
+
54
+    // 切换新窗口打开分类
55
+    await driver.switchTo().newWindow('tab');
56
+    configObj.cateWindow = await driver.getWindowHandle();
57
+
58
+    await driver.get(url);
59
+
60
+    await driver.sleep(1500)
61
+    // 判断是否有二级分类
62
+    const cate2List = await driver.findElements(webdriver.By.js(() => document.querySelectorAll('.cateAttrs li a')));
63
+    if (cate2List.length) {
64
+        configObj.cate2Exist = true;
65
+        configObj.cate2Index = 0;
66
+        configObj.cateListNum = cate2List.length;
67
+
68
+        // 确定当前分类
69
+        const curCate2 = cate2List[configObj.cate2Index];
70
+        configObj.cate2Name = await curCate2.getText();
71
+        const url2 = await curCate2.getAttribute('href');
72
+
73
+        // 切换新窗口打开分类
74
+        await driver.switchTo().newWindow('tab');
75
+        configObj.cate2Window = await driver.getWindowHandle();
76
+        await driver.get(url2);
77
+
78
+        await driver.sleep(1500)
79
+    } else {
80
+        configObj.cate2Exist = false;
81
+    }
82
+
83
+    await eachList(driver);
84
+
85
+
86
+})();

+ 63 - 47
app-list.js

@@ -2,11 +2,7 @@ const webdriver = require('selenium-webdriver');
2 2
 const chrome = require('selenium-webdriver/chrome');
3 3
 const firefox = require('selenium-webdriver/firefox');
4 4
 
5
-const { saveFile } = require('./tools/save-file');
6
-const { baseData } = require('./tools/base-data');
7
-const { skuData } = require('./tools/sku-data');
8
-const { videoData } = require('./tools/video-data');
9
-const { pageInit } = require('./tools/page-init');
5
+const { eachList } = require('./tools/each-list');
10 6
 
11 7
 
12 8
 (async function example() {
@@ -15,56 +11,76 @@ const { pageInit } = require('./tools/page-init');
15 11
         .forBrowser('chrome')
16 12
         .build();
17 13
 
18
-    driver.get('https://lumn.tmall.com/search.htm?spm=a220o.1000855.w5002-15438998724.1.121957deBGk3Lm&search=y');
14
+    driver.get('https://lumn.tmall.com/search.htm?spm=a1z10.3-b-s.w4011-15438998729.1.31ba3b51KXJsTQ');
19 15
 
16
+    let configObj = {
17
+        originalWindow: '',
18
+
19
+        cateName: '',
20
+        cateWindow: '',
21
+        cateIndex: 0,
22
+        cateListNum: 0,
23
+
24
+        cate2Exist: false,
25
+        cate2Name: '',
26
+        catewWindow: '',
27
+        cate2Index: 0,
28
+        cate2ListNum: 0,
29
+    }
30
+
31
+    global.configObj = configObj;
20 32
 
21 33
     // 检查登录
22 34
     await driver.wait(webdriver.until.elementLocated(webdriver.By.className('j_Username')), 100000, '超时未登录');
23 35
     console.log('登陆成功');
24 36
 
25 37
     // 存储原始窗口的 ID
26
-    const originalWindow = await driver.getWindowHandle();
27
-
28
-    await driver.wait(webdriver.until.elementLocated(webdriver.By.js(() => document.querySelectorAll('#J_ShopSearchResult .item4line1 .item'))));
29
-    // let list = await driver.findElements(webdriver.By.js(() => document.querySelectorAll('#J_ShopSearchResult .item4line1')));
30
-
31
-    const index = 0;
32
-
33
-    async function eachList(index) {
34
-        let list = await driver.findElements(webdriver.By.js(() => document.querySelectorAll('#J_ShopSearchResult .item4line1 .item')));
35
-        console.log(['获取list ok', list.length])
36
-        await list[index].findElement(webdriver.By.tagName('a')).click();
37
-
38
-        // 等待新窗口或标签页
39
-        await driver.wait(async () => (await driver.getAllWindowHandles()).length === 2,
40
-            10000
41
-        );
42
-        // 循环执行,直到找到一个新的窗口句柄
43
-        const windows = await driver.getAllWindowHandles();
44
-        windows.forEach(async handle => {
45
-            if (handle !== originalWindow) {
46
-                console.log('切换详情tab');
47
-                await driver.switchTo().window(handle);
48
-            }
49
-        });
50
-        // 等待新标签页完成加载内容
51
-        await driver.sleep(2000);
52
-        pageInit(driver).then(res => {
53
-            index++;
54
-            if (index === list.length) {
55
-                console.log('end')
56
-            } else {
57
-                console.log(['\n\n', '开始', index + 1, '---------------------------------------------------------------------'])
58
-                driver.close();
59
-                setTimeout(() => {
60
-                    driver.switchTo().window(originalWindow).then(() => {
61
-                        eachList(index);
62
-                    })
63
-                }, 300)
64
-            }
65
-        })
38
+    configObj.originalWindow = await driver.getWindowHandle();
39
+
40
+    await driver.sleep(1500)
41
+
42
+    // 获取分类列表
43
+    const cateList = await driver.findElements(webdriver.By.js(() => document.querySelectorAll('.cateAttrs li a')));
44
+    configObj.cateIndex = 0;
45
+    configObj.cateListNum = cateList.length;
46
+
47
+    // 确定当前分类
48
+    const curCate = cateList[configObj.cateIndex];
49
+    configObj.cateName = await curCate.getText();
50
+    const url = await curCate.getAttribute('href');
51
+
52
+    console.log(url)
53
+
54
+    // 切换新窗口打开分类
55
+    await driver.switchTo().newWindow('tab');
56
+    configObj.cateWindow = await driver.getWindowHandle();
57
+
58
+    await driver.get(url);
59
+
60
+    await driver.sleep(1500)
61
+    // 判断是否有二级分类
62
+    const cate2List = await driver.findElements(webdriver.By.js(() => document.querySelectorAll('.cateAttrs li a')));
63
+    if (cate2List.length) {
64
+        configObj.cate2Exist = true;
65
+        configObj.cate2Index = 0;
66
+        configObj.cateListNum = cate2List.length;
67
+
68
+        // 确定当前分类
69
+        const curCate2 = cate2List[configObj.cate2Index];
70
+        configObj.cate2Name = await curCate2.getText();
71
+        const url2 = await curCate2.getAttribute('href');
72
+
73
+        // 切换新窗口打开分类
74
+        await driver.switchTo().newWindow('tab');
75
+        configObj.cate2Window = await driver.getWindowHandle();
76
+        await driver.get(url2);
77
+
78
+        await driver.sleep(1500)
79
+    } else {
80
+        configObj.cate2Exist = false;
66 81
     }
67 82
 
68
-    eachList(index);
83
+    await eachList(driver);
84
+
69 85
 
70 86
 })();

+ 12 - 150
debug.log

@@ -1,150 +1,12 @@
1
-[0822/105025.772:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
2
-[0822/105025.774:ERROR:exception_snapshot_win.cc(98)] thread ID 14100 not found in process
3
-[0822/105025.791:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
4
-[0822/105025.791:ERROR:exception_snapshot_win.cc(98)] thread ID 8812 not found in process
5
-[0822/105041.688:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
6
-[0822/105041.689:ERROR:exception_snapshot_win.cc(98)] thread ID 15944 not found in process
7
-[0822/105109.894:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
8
-[0822/105109.895:ERROR:exception_snapshot_win.cc(98)] thread ID 11972 not found in process
9
-[0822/105109.919:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
10
-[0822/105109.919:ERROR:exception_snapshot_win.cc(98)] thread ID 14452 not found in process
11
-[0822/105109.935:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
12
-[0822/105109.935:ERROR:exception_snapshot_win.cc(98)] thread ID 11136 not found in process
13
-[0822/110022.349:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
14
-[0822/110022.350:ERROR:exception_snapshot_win.cc(98)] thread ID 5372 not found in process
15
-[0822/110022.366:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
16
-[0822/110022.367:ERROR:exception_snapshot_win.cc(98)] thread ID 1484 not found in process
17
-[0822/111324.501:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
18
-[0822/111324.502:ERROR:exception_snapshot_win.cc(98)] thread ID 14580 not found in process
19
-[0822/111324.517:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
20
-[0822/111324.517:ERROR:exception_snapshot_win.cc(98)] thread ID 13720 not found in process
21
-[0823/145024.507:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
22
-[0823/145024.509:ERROR:exception_snapshot_win.cc(98)] thread ID 5328 not found in process
23
-[0823/145024.510:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
24
-[0823/145024.511:ERROR:exception_snapshot_win.cc(98)] thread ID 11152 not found in process
25
-[0823/145024.516:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
26
-[0823/145024.516:ERROR:exception_snapshot_win.cc(98)] thread ID 17576 not found in process
27
-[0823/145024.519:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
28
-[0823/145024.519:ERROR:exception_snapshot_win.cc(98)] thread ID 17448 not found in process
29
-[0823/163029.666:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
30
-[0823/163029.667:ERROR:exception_snapshot_win.cc(98)] thread ID 3128 not found in process
31
-[0823/163029.670:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
32
-[0823/163029.672:ERROR:exception_snapshot_win.cc(98)] thread ID 17304 not found in process
33
-[0823/163029.683:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
34
-[0823/163029.685:ERROR:exception_snapshot_win.cc(98)] thread ID 17556 not found in process
35
-[0823/163029.689:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
36
-[0823/163029.691:ERROR:exception_snapshot_win.cc(98)] thread ID 8572 not found in process
37
-[0823/163029.733:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
38
-[0823/163029.733:ERROR:exception_snapshot_win.cc(98)] thread ID 20012 not found in process
39
-[0823/163029.742:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
40
-[0823/163029.743:ERROR:exception_snapshot_win.cc(98)] thread ID 2876 not found in process
41
-[0823/163029.747:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
42
-[0823/163029.747:ERROR:exception_snapshot_win.cc(98)] thread ID 16220 not found in process
43
-[0823/163029.763:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
44
-[0823/163029.764:ERROR:exception_snapshot_win.cc(98)] thread ID 18364 not found in process
45
-[0823/164337.555:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
46
-[0823/164337.556:ERROR:exception_snapshot_win.cc(98)] thread ID 18744 not found in process
47
-[0823/164337.559:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
48
-[0823/164337.562:ERROR:exception_snapshot_win.cc(98)] thread ID 20128 not found in process
49
-[0823/164337.561:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
50
-[0823/164337.562:ERROR:exception_snapshot_win.cc(98)] thread ID 20380 not found in process
51
-[0823/164337.568:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
52
-[0823/164337.570:ERROR:exception_snapshot_win.cc(98)] thread ID 840 not found in process
53
-[0823/164337.642:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
54
-[0823/164337.642:ERROR:exception_snapshot_win.cc(98)] thread ID 16540 not found in process
55
-[0823/164337.646:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
56
-[0823/164337.646:ERROR:exception_snapshot_win.cc(98)] thread ID 20028 not found in process
57
-[0823/164337.647:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
58
-[0823/164337.647:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
59
-[0823/164337.647:ERROR:exception_snapshot_win.cc(98)] thread ID 32 not found in process
60
-[0823/164337.647:ERROR:exception_snapshot_win.cc(98)] thread ID 8764 not found in process
61
-[0823/165900.802:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
62
-[0823/165900.805:ERROR:exception_snapshot_win.cc(98)] thread ID 11964 not found in process
63
-[0823/165900.829:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
64
-[0823/165900.829:ERROR:exception_snapshot_win.cc(98)] thread ID 17296 not found in process
65
-[0823/165900.829:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
66
-[0823/165900.830:ERROR:exception_snapshot_win.cc(98)] thread ID 18588 not found in process
67
-[0823/165900.837:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
68
-[0823/165900.839:ERROR:exception_snapshot_win.cc(98)] thread ID 16468 not found in process
69
-[0823/165900.861:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
70
-[0823/165900.861:ERROR:exception_snapshot_win.cc(98)] thread ID 8600 not found in process
71
-[0823/165900.864:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
72
-[0823/165900.864:ERROR:exception_snapshot_win.cc(98)] thread ID 15436 not found in process
73
-[0823/165900.867:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
74
-[0823/165900.867:ERROR:exception_snapshot_win.cc(98)] thread ID 15780 not found in process
75
-[0823/170200.737:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
76
-[0823/170200.738:ERROR:exception_snapshot_win.cc(98)] thread ID 8548 not found in process
77
-[0823/170200.752:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
78
-[0823/170200.753:ERROR:exception_snapshot_win.cc(98)] thread ID 18364 not found in process
79
-[0823/170429.007:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
80
-[0823/170429.009:ERROR:exception_snapshot_win.cc(98)] thread ID 18300 not found in process
81
-[0823/170429.033:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
82
-[0823/170429.033:ERROR:exception_snapshot_win.cc(98)] thread ID 16016 not found in process
83
-[0823/170429.048:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
84
-[0823/170429.048:ERROR:exception_snapshot_win.cc(98)] thread ID 17788 not found in process
85
-[0823/171014.426:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
86
-[0823/171014.428:ERROR:exception_snapshot_win.cc(98)] thread ID 17624 not found in process
87
-[0823/171014.427:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
88
-[0823/171014.429:ERROR:exception_snapshot_win.cc(98)] thread ID 16940 not found in process
89
-[0823/171014.466:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
90
-[0823/171014.466:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
91
-[0823/171014.466:ERROR:exception_snapshot_win.cc(98)] thread ID 5140 not found in process
92
-[0823/171014.466:ERROR:exception_snapshot_win.cc(98)] thread ID 17688 not found in process
93
-[0823/171312.122:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
94
-[0823/171312.123:ERROR:exception_snapshot_win.cc(98)] thread ID 12504 not found in process
95
-[0823/171312.132:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
96
-[0823/171312.133:ERROR:exception_snapshot_win.cc(98)] thread ID 18252 not found in process
97
-[0823/172626.409:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
98
-[0823/172626.411:ERROR:exception_snapshot_win.cc(98)] thread ID 18600 not found in process
99
-[0823/172626.457:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
100
-[0823/172626.463:ERROR:exception_snapshot_win.cc(98)] thread ID 18424 not found in process
101
-[0823/172626.499:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
102
-[0823/172626.499:ERROR:exception_snapshot_win.cc(98)] thread ID 19980 not found in process
103
-[0823/172626.511:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
104
-[0823/172626.511:ERROR:exception_snapshot_win.cc(98)] thread ID 1396 not found in process
105
-[0823/175240.200:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
106
-[0823/175240.201:ERROR:exception_snapshot_win.cc(98)] thread ID 19548 not found in process
107
-[0823/175240.218:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
108
-[0823/175240.219:ERROR:exception_snapshot_win.cc(98)] thread ID 17048 not found in process
109
-[0823/180241.199:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
110
-[0823/180241.200:ERROR:exception_snapshot_win.cc(98)] thread ID 5520 not found in process
111
-[0823/180241.218:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
112
-[0823/180241.218:ERROR:exception_snapshot_win.cc(98)] thread ID 1340 not found in process
113
-[0823/203435.284:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
114
-[0823/203435.284:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
115
-[0823/203435.287:ERROR:exception_snapshot_win.cc(98)] thread ID 5292 not found in process
116
-[0823/203435.287:ERROR:exception_snapshot_win.cc(98)] thread ID 18336 not found in process
117
-[0823/203435.302:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
118
-[0823/203435.303:ERROR:exception_snapshot_win.cc(98)] thread ID 18672 not found in process
119
-[0823/203435.303:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
120
-[0823/203435.303:ERROR:exception_snapshot_win.cc(98)] thread ID 20172 not found in process
121
-[0823/203435.311:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
122
-[0823/203435.311:ERROR:exception_snapshot_win.cc(98)] thread ID 12228 not found in process
123
-[0823/211736.971:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
124
-[0823/211736.971:ERROR:exception_snapshot_win.cc(98)] thread ID 9800 not found in process
125
-[0823/211736.990:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
126
-[0823/211736.991:ERROR:exception_snapshot_win.cc(98)] thread ID 3508 not found in process
127
-[0823/212508.784:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
128
-[0823/212508.785:ERROR:exception_snapshot_win.cc(98)] thread ID 13360 not found in process
129
-[0823/212508.803:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
130
-[0823/212508.803:ERROR:exception_snapshot_win.cc(98)] thread ID 3152 not found in process
131
-[0823/212854.130:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
132
-[0823/212854.131:ERROR:exception_snapshot_win.cc(98)] thread ID 11928 not found in process
133
-[0823/212854.157:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
134
-[0823/212854.157:ERROR:exception_snapshot_win.cc(98)] thread ID 19112 not found in process
135
-[0823/221959.541:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
136
-[0823/221959.542:ERROR:exception_snapshot_win.cc(98)] thread ID 15844 not found in process
137
-[0823/221959.573:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
138
-[0823/221959.573:ERROR:exception_snapshot_win.cc(98)] thread ID 6560 not found in process
139
-[0824/103604.894:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
140
-[0824/103604.896:ERROR:exception_snapshot_win.cc(98)] thread ID 14380 not found in process
141
-[0824/103604.919:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
142
-[0824/103604.920:ERROR:exception_snapshot_win.cc(98)] thread ID 6956 not found in process
143
-[0824/103604.935:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
144
-[0824/103604.935:ERROR:exception_snapshot_win.cc(98)] thread ID 1392 not found in process
145
-[0824/103604.940:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
146
-[0824/103604.940:ERROR:exception_snapshot_win.cc(98)] thread ID 19596 not found in process
147
-[0824/103729.788:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
148
-[0824/103729.789:ERROR:exception_snapshot_win.cc(98)] thread ID 1372 not found in process
149
-[0824/103729.808:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
150
-[0824/103729.808:ERROR:exception_snapshot_win.cc(98)] thread ID 18740 not found in process
1
+[0825/183359.289:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
2
+[0825/183359.291:ERROR:exception_snapshot_win.cc(98)] thread ID 1992 not found in process
3
+[0825/183359.315:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
4
+[0825/183359.316:ERROR:exception_snapshot_win.cc(98)] thread ID 18672 not found in process
5
+[0825/183359.322:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
6
+[0825/183359.322:ERROR:exception_snapshot_win.cc(98)] thread ID 21352 not found in process
7
+[0825/185506.847:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
8
+[0825/185506.850:ERROR:exception_snapshot_win.cc(98)] thread ID 16348 not found in process
9
+[0825/185506.861:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
10
+[0825/185506.862:ERROR:exception_snapshot_win.cc(98)] thread ID 11524 not found in process
11
+[0825/185506.885:ERROR:process_reader_win.cc(123)] NtOpenThread: {无法访问} 过程已请求访问一对象,但未给访问权限。 (0xc0000022)
12
+[0825/185506.886:ERROR:exception_snapshot_win.cc(98)] thread ID 8212 not found in process

+ 115 - 0
get-cate.js

@@ -0,0 +1,115 @@
1
+const webdriver = require('selenium-webdriver');
2
+const chrome = require('selenium-webdriver/chrome');
3
+const firefox = require('selenium-webdriver/firefox');
4
+const { saveFile } = require('./tools/save-file');
5
+const cheerio = require('cheerio');
6
+
7
+
8
+
9
+(async function example() {
10
+    // let driver = await new Builder().forBrowser('chrome').build();
11
+    let driver = new webdriver.Builder()
12
+        .forBrowser('firefox')
13
+        .build();
14
+
15
+    driver.get('https://lumn.tmall.com/search.htm?spm=a1z10.3-b-s.w5002-15438998724.1.248c3b51L3CXmJ&search=y');
16
+
17
+
18
+    // 检查登录
19
+    await driver.wait(webdriver.until.elementLocated(webdriver.By.className('j_Username')), 100000, '超时未登录');
20
+    console.log('登陆成功');
21
+
22
+    await driver.sleep(10000)
23
+    console.log('等待');
24
+
25
+    // 存储原始窗口的 ID
26
+    const originalWindow = await driver.getWindowHandle();
27
+
28
+    await driver.sleep(1500)
29
+
30
+    // 获取分类列表
31
+    let cateArr = [];
32
+    const cateIndex = 0;
33
+
34
+    // 判断是否有验证框
35
+    try {
36
+        const dialog = await driver.findElement(webdriver.By.className('baxia-dialog'));
37
+        if (dialog) {
38
+            const ele = await driver.findElement(webdriver.By.js(() => document.querySelector('.baxia-dialog .baxia-dialog-close')))
39
+            ele.click();
40
+        }
41
+        console.log('有验证框');
42
+    } catch (error) {
43
+    }
44
+
45
+    async function eachCate(cateIndex) {
46
+
47
+
48
+        const cateList = await driver.findElements(webdriver.By.js(() => document.querySelectorAll('.cateAttrs li a')));
49
+        // 确定当前分类
50
+        const curCate = cateList[cateIndex];
51
+        const cateName = await curCate.getText();
52
+        let cateObj = {
53
+            name: cateName,
54
+            subType: []
55
+        }
56
+
57
+        const url = await curCate.getAttribute('href');
58
+
59
+
60
+        // 切换新窗口打开分类
61
+        await driver.switchTo().newWindow('tab');
62
+        const cateWindow = await driver.getWindowHandle();
63
+
64
+        await driver.get(url);
65
+
66
+        await driver.sleep(2000)
67
+
68
+
69
+        // 判断是否有验证框
70
+        try {
71
+            const dialog = await driver.findElement(webdriver.By.className('baxia-dialog'));
72
+            if (dialog) {
73
+                const ele = await driver.findElement(webdriver.By.js(() => document.querySelector('.baxia-dialog .baxia-dialog-close')))
74
+                ele.click();
75
+            }
76
+            console.log('有验证框');
77
+        } catch (error) {
78
+        }
79
+
80
+
81
+        // 判断是否有二级分类
82
+        const cate2List = await driver.findElements(webdriver.By.js(() => document.querySelectorAll('.cateAttrs li a')));
83
+
84
+        await driver.sleep(500)
85
+
86
+        let arr = [];
87
+        if (cate2List.length) {
88
+            await driver.getPageSource().then(async htmlStr => {
89
+                const $ = cheerio.load(htmlStr);
90
+                $('.cateAttrs li a').each((i, e) => {
91
+                    arr.push({
92
+                        name: $(e).text()
93
+                    });
94
+                });
95
+            });
96
+        }
97
+        await driver.sleep(2000)
98
+
99
+        cateObj.subType = arr;
100
+        cateArr.push(cateObj)
101
+        await driver.close();
102
+        await driver.switchTo().window(originalWindow)
103
+        cateIndex++;
104
+        console.log('开始' + cateIndex, cateList.length, JSON.stringify(cateObj))
105
+        if (cateIndex < cateList.length) {
106
+            eachCate(cateIndex);
107
+        } else {
108
+            saveFile('/cate/cate.json', JSON.stringify(cateArr,null,4))
109
+        }
110
+    }
111
+
112
+    eachCate(cateIndex);
113
+
114
+
115
+})();

+ 22 - 0
mock_data/分类模型.json

@@ -0,0 +1,22 @@
1
+[{
2
+    "name": "一级分类名称",
3
+    "subType": [{
4
+            "name": "二级分类名称",
5
+            "subType": [{
6
+                "name": "三级级分类名称",
7
+                "subType": []
8
+            }]
9
+        },
10
+        {}
11
+    ]
12
+},
13
+{
14
+    "name": "",
15
+    "subType": [{
16
+            "name": "",
17
+            "subType": []
18
+        },
19
+        {}
20
+    ]
21
+}
22
+]

BIN
msedgedriver.exe


+ 5 - 1
tools/base-data.js

@@ -4,7 +4,7 @@ var entities = require('html-entities').XmlEntities;
4 4
 
5 5
 function baseData(driver) {
6 6
     return new Promise((resolve, reject) => {
7
-        driver.getPageSource().then(htmlStr => {
7
+        driver.getPageSource().then(async htmlStr => {
8 8
             const data = {};
9 9
             const $ = cheerio.load(htmlStr);
10 10
 
@@ -34,6 +34,10 @@ function baseData(driver) {
34 34
                 data.details.push($(e).attr('src'));
35 35
             });
36 36
 
37
+            // id
38
+            const url = await driver.getCurrentUrl();
39
+            data.id = url.replace(/^.+id=(\d+)&.+$/igm,'$1');
40
+            
37 41
             resolve(data);
38 42
         });
39 43
 

+ 54 - 0
tools/each-list.js

@@ -0,0 +1,54 @@
1
+// 保存文件
2
+const fs = require('fs');
3
+const path = require('path');
4
+const webdriver = require('selenium-webdriver');
5
+const { pageInit } = require('./page-init');
6
+
7
+function eachList(driver) {
8
+
9
+    return new Promise(async (resolve, reject) => {
10
+        await driver.wait(webdriver.until.elementLocated(webdriver.By.js(() => document.querySelectorAll('#J_ShopSearchResult .item4line1 .item'))));
11
+        const index = 0;
12
+        
13
+        const listWindow = await driver.getWindowHandle();
14
+
15
+        async function eachList(index) {
16
+            let list = await driver.findElements(webdriver.By.js(() => document.querySelectorAll('#J_ShopSearchResult .item4line1 .item')));
17
+
18
+            // 手动减去推荐(暂无其他办法)
19
+            const endNum = list.length - 8;
20
+
21
+            console.log(['获取list ok', endNum])
22
+            const listItem = await list[index].findElement(webdriver.By.tagName('a'))
23
+            const url = await listItem.getAttribute('href');
24
+
25
+            // 等待新窗口或标签页
26
+            await driver.switchTo().newWindow('tab');
27
+
28
+            await driver.get(url);
29
+            
30
+            // 等待新标签页完成加载内容
31
+            await driver.sleep(2000);
32
+            pageInit(driver).then(res => {
33
+                index++;
34
+                if (index === endNum) {
35
+                    console.log('一个分类结束')
36
+                    resolve();
37
+                } else {
38
+                    console.log(['\n\n', '开始', index + 1, '---------------------------------------------------------------------'])
39
+                    driver.close();
40
+                    setTimeout(() => {
41
+                        driver.switchTo().window(listWindow).then(() => {
42
+                            eachList(index);
43
+                        })
44
+                    }, 300)
45
+                }
46
+            })
47
+        }
48
+
49
+        eachList(index);
50
+    });
51
+
52
+}
53
+
54
+exports.eachList = eachList;

+ 1 - 0
tools/save-file.js

@@ -2,6 +2,7 @@
2 2
 const fs = require('fs');
3 3
 const path = require('path');
4 4
 const { log_w } = require('./log-w');
5
+const { config } = require('process');
5 6
 
6 7
 function saveFile(fileName, con) {
7 8