python

c语言爬虫和python爬虫实现

小樊
83
2024-12-05 14:26:38
栏目: 编程语言

在C语言和Python中实现爬虫的方法有所不同,但基本原理相同。这里分别给出一个简单的C语言和Python爬虫示例。

  1. C语言爬虫示例:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <curl/curl.h>

static size_t WriteCallback(void *contents, size_t size, size_t nmemb, void *userp) {
    size_t realsize = size * nmemb;
    char **response = (char **)userp;
    *response = realloc(*response, realsize + 1);
    if (*response == NULL) {
        return 0;
    }
    memcpy(*response, contents, realsize);
    (*response)[realsize] = '\0';
    return realsize;
}

int main(void) {
    CURL *curl;
    CURLcode res;
    char *response = NULL;

    curl_global_init(CURL_GLOBAL_DEFAULT);
    curl = curl_easy_init();
    if(curl) {
        curl_easy_setopt(curl, CURLOPT_URL, "https://example.com");
        curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
        curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response);
        res = curl_easy_perform(curl);
        if(res != CURLE_OK) {
            fprintf(stderr, "curl_easy_perform() failed: %s\n", curl_easy_strerror(res));
        } else {
            printf("Response:\n%s\n", response);
        }
        curl_easy_cleanup(curl);
    }
    curl_global_cleanup();
    free(response);
    return 0;
}
  1. Python爬虫示例:
import requests
from bs4 import BeautifulSoup

url = "https://example.com"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

print("Response:")
print(soup.prettify())

这两个示例都实现了一个简单的爬虫,从指定的URL获取HTML内容并打印出来。C语言示例使用了libcurl库,而Python示例使用了requests库。在Python示例中,我们还使用了BeautifulSoup库来解析HTML内容。

0
看了该问题的人还看了