php获取搜索引擎爬虫蜘蛛

云游道人 2025-08-01 1533 阅读 0评论

直接上代码:

function isbot($tmp) {
   // 定义蜘蛛识别规则集(保持原顺序)
   $rules = [
       ['patterns' => ['compatible; Googlebot/2.1'], 'label' => '谷歌蜘蛛'],
       ['patterns' => ['Googlebot-Mobile'], 'label' => '谷歌蜘蛛'],
       ['patterns' => ['Googlebot-Image'], 'label' => '谷歌图片蜘蛛'],
       ['patterns' => ['Mediapartners-Google'], 'label' => '谷歌广告蜘蛛'],
       ['patterns' => ['Adsbot-Google'], 'label' => '谷歌质量蜘蛛'],
       ['patterns' => ['Googlebot'], 'label' => '谷歌蜘蛛'],
       ['patterns' => ['GoogleOther'], 'label' => '谷歌蜘蛛'],
       ['patterns' => ['Baiduspider-mobile'], 'label' => '百度蜘蛛'],
       ['patterns' => ['Baidu-Thumbnail'], 'label' => '百度图片蜘蛛'],
       ['patterns' => ['Baiduspider-image'], 'label' => '百度图片蜘蛛'],
       ['patterns' => ['Baiduspider-news'], 'label' => '百度新闻蜘蛛'],
       ['patterns' => ['Baiduspider-video'], 'label' => '百度视频蜘蛛'],
       ['patterns' => ['Baidu-Transcoder'], 'label' => '百度音乐蜘蛛'],
       ['patterns' => ['baiduspider-mobile-gate'], 'label' => '百度移动蜘蛛'],
       ['patterns' => ['Baiduspider'], 'label' => '百度蜘蛛'],
       ['patterns' => ['Sosospider'], 'label' => '搜搜蜘蛛'],
       ['patterns' => ['Sosoimagespider'], 'label' => '搜搜图片蜘蛛'],
       ['patterns' => ['Yahoo! Slurp China'], 'label' => '雅虎中文蜘蛛'],
       ['patterns' => ['Yahoo ContentMatch Crawler'], 'label' => '雅虎竞价蜘蛛'],
       ['patterns' => ['Yahoo-MMCrawler'], 'label' => '雅虎图片蜘蛛'],
       ['patterns' => ['Yahoo! Slurp'], 'label' => '雅虎英文蜘蛛'],
       ['patterns' => ['msnbot'], 'label' => '微软蜘蛛'],
       ['patterns' => ['msnbot-media'], 'label' => '微软媒体蜘蛛'],
       ['patterns' => ['MSNBot-Media'], 'label' => '微软多媒体蜘蛛'],
       ['patterns' => ['MSNBot-NewsBlogs'], 'label' => '微软新闻及blog蜘蛛'],
       ['patterns' => ['MSNBot-Academic'], 'label' => '微软学术蜘蛛'],
       ['patterns' => ['MSNBot'], 'label' => '微软网页蜘蛛'],
       ['patterns' => ['Sosospider'], 'label' => '360蜘蛛'], // 注意:此规则在搜搜之后
       ['patterns' => ['360Spider'], 'label' => '360蜘蛛'], // 注意:此规则在搜搜之后
       ['patterns' => ['YodaoBot', 'OutfoxBot'], 'label' => '有道蜘蛛'],
       ['patterns' => ['Sogou web spider', 'Sogou Orion spider'], 'label' => '搜狗蜘蛛'],
       ['patterns' => ['Sogou inst spider'], 'label' => '搜狗蜘蛛'],
       ['patterns' => ['Sogou News Spider'], 'label' => '搜狗新闻蜘蛛'],
       ['patterns' => ['Sogou spider2'], 'label' => '搜狗蜘蛛'],
       ['patterns' => ['Sogou blog'], 'label' => '搜狗blog蜘蛛'],
       ['patterns' => ['sogou spider'], 'label' => '搜狗蜘蛛'],
       ['patterns' => ['bingbot'], 'label' => '必应蜘蛛'],
       ['patterns' => ['EtaoSpider'], 'label' => '一淘网蜘蛛'],
       ['patterns' => ['Scooter'], 'label' => 'Altavista蜘蛛'],
       ['patterns' => ['Lycos_Spider'], 'label' => 'Lycos蜘蛛'],
       ['patterns' => ['FAST-WebCrawler'], 'label' => 'Alltheweb蜘蛛'],
       ['patterns' => ['Slurp ASPSeek ASPSeek'], 'label' => 'INKTOMI蜘蛛'],
       ['patterns' => ['lanshanbot'], 'label' => '东方网景爬虫'],
       ['patterns' => ['BSpider'], 'label' => '日本爬虫'],
       ['patterns' => ['fast-webcrawler'], 'label' => 'fast-webcrawler'],
       ['patterns' => ['Gaisbot'], 'label' => 'Gaisbot'],
       ['patterns' => ['ia_archiver'], 'label' => 'Alexa蜘蛛'],
       ['patterns' => ['altavista'], 'label' => 'altavista爬虫'],
       ['patterns' => ['lycos_spider'], 'label' => 'Lycos蜘蛛'],
       ['patterns' => ['Inktomi slurp'], 'label' => 'Inktomi slurp'],
       ['patterns' => ['YandexBot'], 'label' => 'Yandex蜘蛛'],
       ['patterns' => ['AhrefsBot'], 'label' => 'AhrefsBot'],
       ['patterns' => ['ezooms.bot'], 'label' => 'ezooms.bot'],
       ['patterns' => ['YisouSpider'], 'label' => '神马搜索'],
       ['patterns' => ['MJ12bot'], 'label' => 'Majestic爬虫'],
       ['patterns' => ['SemrushBot'], 'label' => 'Semrush爬虫'],
       ['patterns' => ['DuckDuckBot'], 'label' => 'DuckDuckGo蜘蛛'],
       ['patterns' => ['facebookexternalhit'], 'label' => 'Facebook爬虫'],
       ['patterns' => ['Twitterbot'], 'label' => 'Twitter爬虫'],
       ['patterns' => ['LinkedInBot'], 'label' => 'LinkedIn爬虫'],
       ['patterns' => ['Pinterestbot'], 'label' => 'Pinterest爬虫'],
       ['patterns' => ['DotBot'], 'label' => 'DotNet爬虫'],
       ['patterns' => ['PetalBot'], 'label' => 'Petal爬虫'],
       ['patterns' => ['Exabot'], 'label' => 'Exalead爬虫'],
       ['patterns' => ['SeznamBot'], 'label' => 'Seznam爬虫'],
       ['patterns' => ['Slurp'], 'label' => 'Yahoo爬虫'],
       ['patterns' => ['rogerbot'], 'label' => 'Moz爬虫'],
       ['patterns' => ['Nimbostratus'], 'label' => 'CloudFlare爬虫'],
   ];

   foreach ($rules as $rule) {
       foreach ($rule['patterns'] as $pattern) {
           if (stripos($tmp, $pattern) !== false) {
               return $rule['label'];
           }
       }
   }

   return '';
}

以下是判断客户端访问类型:

function ClientType() {
   // 安全获取 HTTP 头部信息
   $userAgent = $_SERVER['HTTP_USER_AGENT'] ?? '';
   $httpAccept = $_SERVER['HTTP_ACCEPT'] ?? '';

   // 优先检测蜘蛛类型
   if ($botType = isbot($userAgent)) {
       return "蜘蛛:" . $botType;
   }

   // 非蜘蛛设备分类逻辑
   if (stripos($httpAccept, 'text/html') !== false) {
       return "访客";
   }

   // API 客户端、爬虫工具等特殊类型
   // 常见爬虫工具列表
   $crawlerTools = [
       'curl' => 'cURL命令行工具',
       'wget' => 'Wget下载工具',
       'python' => 'Python爬虫',
       'java' => 'Java爬虫',
       'php' => 'PHP爬虫',
       'perl' => 'Perl爬虫',
       'ruby' => 'Ruby爬虫',
       'go-http-client' => 'Go爬虫',
       'node-fetch' => 'Node.js爬虫',
       'libwww' => 'libwww-perl工具',
       'okhttp' => 'OkHttp客户端',
       'http-client' => 'HTTP客户端',
       'apache-httpclient' => 'Apache HTTP客户端',
       'axios' => 'Axios HTTP客户端',
   ];
   // 检查爬虫工具
 foreach ($crawlerTools as $key => $name) {
       if (stripos($userAgent, $key) !== false) {
           return $name;
       }
   }

   // 默认未知类型
   return "未知";
}


喜欢就支持以下吧
点赞 0

发表评论

快捷回复: 表情:
aoman baiyan bishi bizui cahan ciya dabing daku deyi doge fadai fanu fendou ganga guzhang haixiu hanxiao zuohengheng zhuakuang zhouma zhemo zhayanjian zaijian yun youhengheng yiwen yinxian xu xieyanxiao xiaoku xiaojiujie xia wunai wozuimei weixiao weiqu tuosai tu touxiao tiaopi shui se saorao qiudale qinqin qiaoda piezui penxue nanguo liulei liuhan lenghan leiben kun kuaikule ku koubi kelian keai jingya jingxi jingkong jie huaixiao haqian aini OK qiang quantou shengli woshou gouyin baoquan aixin bangbangtang xiaoyanger xigua hexie pijiu lanqiu juhua hecai haobang caidao baojin chi dan kulou shuai shouqiang yangtuo youling
提交
评论列表 (有 0 条评论, 1533人围观)

最近发表

热门文章

最新留言

热门推荐

标签列表