ThinkChat2.0新版上线,更智能更精彩,支持会话、画图、阅读、搜索等,送10W Token,即刻开启你的AI之旅 广告
# 1. 说明 组件集成Guzzle,可以登录网站,并下集网站指定链接代码,然后利用phpQuery分析代码,提取想要信息 ## 采用库 * 分析库 1.simple_html_dom 2. phpQuery * 采集库 1. Guzzle 2. Requests ## 代码结构 >[info] Zsnoopy.php 实例 集成采用QueryList类的简化操作方法 ``` 代码结构 Zsnoopy.php -chsys ------| snoopy.php ``` ### Zsnoopy.php ```php <?php namespace app\common\snoopy; use think\Exception; use think\facade\Cache; //加载采集库 if (!class_exists('Requests')) { // include Env::get('root_path') .'extend/libs/Requests.php'; include app('env')->get('extend_path') . 'libs/Requests.php'; } if (!class_exists('phpQuery')) { // include Env::get('root_path') . 'extend/libs/phpQuery.php'; include app('env')->get('extend_path') . 'libs/phpQuery.php'; } //20170728 添加,用于解决phpQuery解析不了ueeshop升级后分析不了产品详细信息html问题 /* if(!function_exists('file_get_html')){ include EXTEND_PATH.'/libs/simple_html_dom.php'; } */ \Requests::register_autoloader(); /** * 构建器 * @package app\common\builder * @author 岩 <63453409@qq.com> 集成 https://terryz.oschina.io/ 组件 构建快速开发基础环境 */ class ZSnoopy { //链接登录后对象 protected $conn; protected $cache = true; //默认开启动缓存 protected $html = null; // protected $log = false; //是否输出日志,方便观察程序执行过程 /** * 初始化 * @author 岩 <63453409@qq.com> */ public function __construct() { //不支持小于 tp5.1版本 if (version_compare('5.1', app('app')->version(), '<')) { die('Snoopy is not supert thinkphp version < 5.1 cur thinkphp version is ' . app('app')->version()); // throw new Exception('ZBuilder is not supert thinkphp version > 5.0.11',8001); } /* //加载zbuilder的全局配置 $config = __DIR__.DS.'config.php'; if (is_file($config)) { Config::load($config,'builder'); } */ } //功能完善 /** * 预留 自动配置,安装 前端代码 */ protected function install() { } //动态方法 /** * 创建各种builder的入口 * @param string $type 构建器名称,'Form', 'Table', 'View' 或其他自定义构建器 * @param string $action 动作 * @author 岩 <63453409@qq.com> * @throws Exception */ public static function engine($type = '', $action = '') { // supper tp5.1 /* if (!defined('DS')) { define('DS', DIRECTORY_SEPARATOR); } */ if ($type == '') { throw new Exception('未指定构建器名称', 8001); } else { $type = strtolower($type); } //初始化配置 // $config = APP_PATH.'common'.DS.'builder'.DS.$type.DS.'config.php'; /* $config = __DIR__.DS.$type.DS.'config.php'; if (is_file($config)) { Config::load($config,$type); } */ // 构造器类路径 $class = '\\app\\common\\snoopy\\' . $type . '\\Snoopy'; if (!class_exists($class)) { throw new Exception($type . '构建器不存在', 8002); } return new $class; } /** * 执行任务 * @author 岩 <63453409@qq.com> * @return mixed */ public function run() { } //**********************************************// //**********************************************// //*********辅助处理方法****************// //**********************************************// /* * 攫取后台数据 */ protected function getUrl($url = null, $cache = true) { $data = $this->cache(md5($url)); if (empty($data) or ($cache == false)) { $result = \Requests::get($url); $this->cache(md5($url), $result->body, 30); $data = $result->body; } return $data; } /* * 验证链接是否正常响应 * @return boolean [true|false] */ public function checkUrl($url) { $curl = curl_init($url); curl_setopt($curl, CURLOPT_NOBODY, true); $result = curl_exec($curl); if ($result !== false) { $statusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); if ($statusCode == 200) { return true; } else { return false; } } else { return false; } } public function put($mess) { echo '[' . date('Y-m-d H:i:m') . ']' . $mess . PHP_EOL; } /* * 缓存数据 * @param string $key 缓存键名称 * @param [array|string] $data 缓存值 * @resturn [array|string|int|float] $result 返回数据 */ public function cache($key = null, $data = null, $timeout = 3600) { //如$key为默认值,则表示无设置 ,执行清空缓存 if (empty($key)) { Cache::clear('Zsnoopy'); return true; } //开启缓存则 if ($this->cache) { if (isset($data)) { Cache::tag('Zsnoopy')->set('snoopy_' . (string) $key, $data, $timeout); $result = $data; } $result = Cache::tag('Zsnoopy')->get('snoopy_' . $key); } return isset($result) ? $result : $data; } } ``` ### snoopy.php ```php <?php namespace app\common\snoopy\chsys; use app\common\snoopy\ZSnoopy; class Snoopy extends ZSnoopy { protected $_cookie = null; //保存登录系统后的cookie认证信息 protected $cache = true; //默认启动 cache protected $log = false; protected $config = [ ]; //默认需要登录系统 public function __construct(array $loginConf = []) //初始化 登录后台 { } //配置是否使用缓存 public function setCache($value = false) { $this->cache = $value; return $this; } //验证mrtg设备链接地址是否正确 public function checkMrtgUrl($url) { \phpQuery::newDocumentHTML($this->getUrl($url, false)); $setName = 'chsysname'; // $setName = pq('body > h1')->text(); return trim($setName); } /* * 获取设备所有端口链接信息 */ public function getSetPost($url, $setName) { \phpQuery::newDocumentHTML($this->getUrl($url, false)); $list = []; // $tds = pq('body > table:nth-child(2)')->find('td'); // dump($tds); foreach ($tds as $_td) { $_tmp = []; $name = pq($_td)->find('b')->text(); if (preg_match('/([\w\/\.]+) -- ([\w\/\-]+)/i', $name, $names)) { // trace($names,'log'); $_tmp['setname'] = $setName; //数据表中,需要这个称名 $_tmp['portname'] = $names[1]; $_tmp['hostname'] = $names[2]; $_tmp['url'] = pq($_td)->find('a')->attr('href'); $picUrl = pq($_td)->find('img')->attr('src'); $_tmp['picUrl'] = empty($picUrl) ? null : $url . pq($_td)->find('img')->attr('src'); $list[] = $_tmp; } } return $list; } //获取MRTG链接所有图片链接地址 public function getPics($url) { $baseUrl = pathinfo($url)['dirname']; // trace($baseUrl,'log'); \phpQuery::newDocumentHTML($this->getUrl($url, false)); $list = []; // $tds = pq('.graph'); // dump($tds); foreach ($tds as $_td) { $_tmp = []; $picName = pq($_td)->find('img')->attr('src'); $_tmp['picName'] = $picName; $_tmp['picUrl'] = empty($picName) ? null : $baseUrl . '/' . $picName; $list[] = $_tmp; } return $list; } public function viewSet($url) { return $this->getUrl($url, false); } public function getPost($url) { return $this->getUrl($url, false); } //通过url下载图片 public static function getUrlToPic($url, $fileName, $cacheTime = 300) { self::checkFile($fileName, $cacheTime); if (!file_exists($fileName)) { file_put_contents($fileName, file_get_contents($url)); } } //判断文件保存时间 /* * @param string $fileName 文件全路径名 * @param int $cacheTime 缓存时间,默认300秒,则5分钟 * @return boolean [true|false] 文件是否符合 */ public static function checkFile($fileName, $cacheTime = 300) { if (file_exists($fileName)) { if ((time() - filectime($fileName)) > $cacheTime) { unlink($fileName); } } } } ``` # 2.应用实例