您的位置 首页 php

讲解thinkphp5.1如何实现多线程爬虫

下面thinkphp框架教程栏目将给大家讲解thinkphp5.1 利用cli命令行+Guzzle类库实现多线程爬虫,希望对需要的朋友有所帮助!

创建一个cli命令

php think make:command Thread thread

测试能否成功执行

php think thread

安装Guzzle类库

文档地址:guzzle文档地址(https://guzzle-cn.readthedocs.io/zh_CN/latest/quickstart.html)

实现代码

<?php/** * Created by. * User: Jim * Date: 2020/9/29 * Time: 14:31 */namespace app\command;use GuzzleHttp\Client;use GuzzleHttp\Pool;use think\console\Command;use think\console\Input;use think\console\Output;/** * Guzzle * Class Thread * @package app\command * 文档地址 https://guzzle-cn.readthedocs.io/zh_CN/latest/quickstart.html */class Thread extends Command{    /**     * 请求的总次数     * @var int     */    protected $totalPageCount = 50;    /**     * 当前请求的次数     * @var int     */    protected static $counter = 1;    /**     * 线程的数量     * @var int     */    protected $threads = 20;    protected function configure()    {        // 指令配置        $this->setName('thread');        // 设置参数    }    protected function execute(Input $input, Output $output)    {        $client = new Client();        $requests = function ($total) use ($client) {            foreach (range(1, $total) as $r) {                $uri = 'https://apinew.juejin.im/content_api/v1/short_msg/detail';                yield function () use ($client, $uri) {                    return $client->postAsync($uri, [                        'verify' => false,                        'json' => [                            'msg_id' => '6845185452727599118'                        ]                    ]);                };            }        };        $pool = new Pool($client, $requests($this->totalPageCount), [            'concurrency' => $this->threads,            // 请求成功            'fulfilled' => function ($response, $index) use ($output) {                $res = $response->getBody()->getContents();                $output->writeln($res);                $output->writeln("正在执行第{$index}个·····");                if ($this->checkThreadIsEnd() == true) {                    $output->writeln("------------请求结束---------");                    return false;                }            },            // 请求失败            'rejected' => function ($reason, $index) use ($output) {                $output->writeln("执行失败,{$reason}");            },        ]);        $promise = $pool->promise();        $promise->wait();    }    /**     * 检测任务是否结束     * @return bool     */    private function checkThreadIsEnd()    {        if (self::$counter < $this->totalPageCount) {            self::$counter++;            return false;        } else {            return true;        }    }}

执行命令

php think thread

效果

文章来源:智云一二三科技

文章标题:讲解thinkphp5.1如何实现多线程爬虫

文章地址:https://www.zhihuclub.com/24502.shtml

关于作者: 智云科技

热门文章

网站地图