这是一个很有趣的爬虫代码,可以爬取指定地区的91论坛帖子,你会得到一个yp信息集锦 So cool ooOOOO

发布时间 2023-09-25 15:12:17作者: lambertlt
var Nightmare = require('nightmare');

// https://duckduckgo.com

function open(page) {
    var nightmare = Nightmare({ show: false });
    // console.log(`开始爬取=https://t0904.91zuixindizhi.com/forumdisplay.php?fid=19&page=${page}`);
    nightmare
        .goto(`https://t0904.91zuixindizhi.com/forumdisplay.php?fid=19&page=${page}`)
        .wait('.datatable .subject,.new')
        .evaluate(function () {
            let list = document.querySelectorAll('.datatable .subject,.new')
            let ya = []
            list.forEach(e => {
                let arr = e.querySelectorAll('a')
                if (arr.length > 0)
                    if (arr[0].innerText.includes("北京")) // 在这里更改检索关键词
                        ya.push({ href: arr[0].href, text: arr[0].innerText })

            })
            return ya
        })
        .end()
        .then(function (result) {
            result.forEach(e => e['page'] = page)
            if (result.length > 0)
                console.log(result);
            else
                console.log(`第${page}页没有所需数据`);
        })
        .catch(function (error) {
            console.error('Search failed:', error);
        });
}

function sleep(time) {
    return new Promise((resolve) => setTimeout(resolve, time));
}

async function run() {
    let page = 462
    console.time('爬取耗时:');
    while (page < 1000) {
        open(page)
        await sleep(5000);
        page++
    }
    console.timeEnd('爬取耗时:');
}

run()