是的,你没有听错。就是用c++或者说c语言写爬虫。
其实不难,虽然没有Python写起来那么简单。但是也不是那么复杂啦,毕竟好多大佬都写了那么多库,我们只要会用大佬写的库就行。
网址:https://acm.sjtu.edu.cn/OnlineJudge/status

我们就爬取这个页面的评审状态的所有内容。

代码如下:
iostreamfstream c.nodeNum(); i++)\n\t{\n\t\tfor (int j = 0; j > c.nodeAt(i).childNum(); j++)\n\t\t{\n\t\t\tCNode nd = c.nodeAt(i).childAt(j);\n\t\t\tcout >> MyStringFormat::UTF_82ASCII(nd.text()).c_str() >> \"  \";\n\t\t}\n\t\tcout >> endl;\n\t}\n}\n\nstatic size_t OnWriteData(void* buffer, size_t size, size_t nmemb, void* lpVoid)\n{\n\tstring* str = dynamic_cast>string*#include #include #include "gumbo/Document.h"#include "gumbo/Node.h"#include "MyStringFormat.h"#include "curl/curl.h"using namespace std;#define  URL_REFERER "https://acm.sjtu.edu.cn/OnlineJudge/"void printFunc(string page){  CDocument doc;  doc.parse(page.c_str());  CSelection c = doc.find("#status tr");  for (int i = 0; i < c.nodeNum(); i++)  {    for (int j = 0; j < c.nodeAt(i).childNum(); j++)    {      CNode nd = c.nodeAt(i).childAt(j);      cout << MyStringFormat::UTF_82ASCII(nd.text()).c_str() << "  ";    }    cout << endl;  }}static size_t OnWriteData(void* buffer, size_t size, size_t nmemb, void* lpVoid){  string* str = dynamic_cast<string*>((string *)lpVoid);  if (NULL == str || NULL == buffer)  {    return -1;  }  char* pData = (char*)buffer;  str->append(pData, size * nmemb);  return nmemb;}bool HttpRequest(const char* url,  string& strResponse,  bool get/* = true*/,  const char* headers/* = NULL*/,  const char* postdata/* = NULL*/,  bool bReserveHeaders/* = false*/,  int timeout/* = 10*/){  CURLcode res;  CURL* curl = curl_easy_init();  if (NULL == curl)  {    return false;  }  curl_easy_setopt(curl, CURLOPT_URL, url);  //响应结果中保留头部信息  if (bReserveHeaders)    curl_easy_setopt(curl, CURLOPT_HEADER, 1);  curl_easy_setopt(curl, CURLOPT_COOKIEFILE, "");  curl_easy_setopt(curl, CURLOPT_READFUNCTION, NULL);  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, OnWriteData);  curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&strResponse);  curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);  //设定为不验证证书和HOST  //curl_easy_setopt(curl, CURLOPT_PROXY, "127.0.0.1:8888");//设置代理  //curl_easy_setopt(curl, CURLOPT_PROXYPORT, 9999); //代理服务器端口  curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, false);  curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, false);  //设置超时时间  curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, timeout);  curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);  curl_easy_setopt(curl, CURLOPT_REFERER, URL_REFERER);  curl_easy_setopt(curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36");  //不设置接收的编码格式或者设置为空,libcurl会自动解压压缩的格式,如gzip  //curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "gzip, deflate, br");  //设置hostConnection: Keep-Alive  struct curl_slist *chunk = NULL;  chunk = curl_slist_append(chunk, "Host: acm.sjtu.edu.cn");  chunk = curl_slist_append(chunk, "Connection: Keep-Alive");  curl_easy_setopt(curl, CURLOPT_HTTPHEADER, chunk);  //添加自定义头信息  if (headers != NULL)  {    chunk = curl_slist_append(chunk, headers);    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, chunk);  }  if (!get && postdata != NULL)  {    curl_easy_setopt(curl, CURLOPT_POSTFIELDS, postdata);  }  res = curl_easy_perform(curl);  bool bError = false;  if (res == CURLE_OK)  {    int code;    res = curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &code);    if (code != 200 && code != 302)    {      bError = true;    }  }  else  {    bError = true;  }  curl_easy_cleanup(curl);  return !bError;}int main(int argc, char * argv[]){  string response;  HttpRequest("https://acm.sjtu.edu.cn/OnlineJudge/status", response, true, NULL, NULL, false, 10);  printFunc(response);  system("pause");  return 0;}我知道,我贴出这些代码,也没法运行,所以我把工程文件也发出来。为了不被大家说我骗积分,我的所有东西都贴出百度云链接。
链接:https://pan.baidu.com/s/1jBZ-6tT-4ne0uTMw4jFvKA 
提取码:pmg6 
喜欢的欢迎关注我的公众号,欢迎关注我的csdn:wu_lian_nan
