你想找的微信文章数据采集工具,简单说就是“输入文章链接,就能挖出超多隐藏信息”的神奇接口!
比如你随便扔个微信公众号文章的网址进去,它能立刻给你扒出——
阅读量、点赞数、推荐数、分享次数这些基础数据,还有留言区的总评论条数、每条评论的完整内容、用户头像昵称(当然会隐去敏感信息)。
更厉害的是,它还能顺藤摸瓜拿到文章本身的信息:标题、正文文字、发布时间、作者名字、所在地区、公众号名称,甚至封面图的高清链接。
连公众号的“隐藏身份证号”都能挖到——比如appid(公众号在微信平台上的唯一编码)、原始gh参数(账号的原始标识值),这些技术党爱用的数据也一应俱全!
总共能捞到上百个维度的数据,从表面互动量到深层内容细节,再到账号身份信息,全给你打包得明明白白。
简单说,就是“一个链接喂进去,一串数据吐出来”,不管是做内容分析、竞品调研,还是研究用户互动规律,都能靠它轻松搞定!
<!DOCTYPE html> <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> <meta name="author" content="www.zhiwu55.com"> <meta name="viewport" content="width=device-width,initial-scale=1.0,maximum-scale=1.0,user-scalable=0,viewport-fit=cover"> </head> <body> <?php if (!file_exists('./sk.txt')) { // 判断有没有获取到sk调用凭证$appid = $_SERVER['HTTP_HOST'] . php_uname('s') . php_uname('n') . php_uname('m');$appid = md5($appid);$installArr = array();$installArr['appid'] = $appid;$installArr['addons_name'] = 'hzw_pay_user';$installArr['domain_url'] = 'http://' . $_SERVER['HTTP_HOST']; $ch = curl_init();curl_setopt($ch, CURLOPT_URL, 'http://userid.zhiwu55.com.cn/hzw_api/collect/install.php');curl_setopt($ch, CURLOPT_POST, true);curl_setopt($ch, CURLOPT_POSTFIELDS, $installArr);curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);$response = curl_exec($ch);if (curl_errno($ch)) {echo 'cURL 错误: ' . curl_error($ch);exit;}curl_close($ch);if (strlen(trim($response)) == 32) { $installArr['appid_key'] = $response;} else {$installArr['appid_key'] = '';}$sk = serialize($installArr);file_put_contents('./sk.txt', $sk); // 保存调用凭证 } if (file_exists('./sk.txt')) {$sk = file_get_contents('./sk.txt');$skArr = unserialize($sk);echo 'appid:' . $skArr['appid'] . "<hr>";echo '密钥:' . $skArr['appid_key'] . "<hr>"; } if (empty($_GET['send'])) {echo '<form action="?send=yes" method="post">';echo '<input type="text" name="link_href" style="width:600px;padding:12px;" placeholder="请输入微信文章地址">';echo '<br>';echo '<br>';echo '<input type="submit" value="确定采集" style="font-size:18px;" onClick="this.value=\'稍等……\'">';echo '</form>';} elseif ($_GET['send'] == 'yes' && stripos($_POST['link_href'], 'mp.weixin.qq.com') != false) {echo '获取中……';$ApiCollect_appid = $skArr['appid'];$ApiCollect_appid_key = $skArr['appid_key'];$t = time();$dataArr = array();$dataArr['appid'] = $ApiCollect_appid;$dataArr['link_href'] = $_POST['link_href'];$dataArr['callback_post_link'] = ''; // 【建议填写】接收采集结果数据的地址,采集完成之后,会第一时间把结果POST推送到这个地址,参考实例代码:http://discuz.zhiwu55.vip/admin.php?action=plugins&operation=config&do=10&identifier=zhiwu55_wxcollect&pmod=hzw_run $dataArr['t'] = $t;$sign = md5($ApiCollect_appid . $ApiCollect_appid_key . $dataArr['link_href'] . $dataArr['callback_post_link'] . $t);$dataArr['sign'] = $sign;$ch = curl_init();curl_setopt($ch, CURLOPT_URL, 'http://ai.zhiwu55.com/ApiCollectPostData');curl_setopt($ch, CURLOPT_POST, true);curl_setopt($ch, CURLOPT_POSTFIELDS, $dataArr);curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);$response = curl_exec($ch);if (curl_errno($ch)) {echo 'cURL 错误: ' . curl_error($ch);exit;}curl_close($ch);if (stripos($response, 'hzw_success') !== 0) {echo $response;exit;}echo '<script>setTimeout(function(){window.location.href="?send=yes&appid=' . $skArr['appid'] . '&sign=' . $sign . '";},6000);</script>';} elseif (strlen($_GET['appid']) == 32 && strlen($_GET['sign']) == 32) {$ch = curl_init();curl_setopt($ch, CURLOPT_URL, 'http://ai.zhiwu55.com/ApiCollectHandUpdateData');curl_setopt($ch, CURLOPT_POST, true);curl_setopt($ch, CURLOPT_POSTFIELDS, array('appid' => $_GET['appid'], 'sign' => $_GET['sign']));curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);$response = curl_exec($ch);if (curl_errno($ch)) {echo 'cURL 错误: ' . curl_error($ch);exit;}curl_close($ch);if (strlen($response) < 200) {echo '采集中……,稍等一下……';echo '<script>setTimeout(function(){window.location.reload();},2000);</script>';} else {$responseArr = unserialize(base64_decode($response));unset($responseArr['content']); // 微信文章的主体内容 if (!empty($responseArr['discuss'])) {$responseArr['discuss'] = trim($responseArr['discuss']);$responseArr['discuss'] = json_decode($responseArr['discuss'], true);}echo '<pre>';print_r($responseArr);echo '</pre>';}} ?> </body> </html>