您好,登录后才能下订单哦!
密码登录
            
            
            
            
        登录注册
            
            
            
        点击 登录注册 即表示同意《亿速云用户服务条款》
        # Python3怎么爬取英雄联盟所有英雄皮肤
## 前言
英雄联盟(League of Legends)作为全球最受欢迎的MOBA游戏之一,拥有超过150位英雄和数千款精美皮肤。本文将详细介绍如何使用Python3爬取英雄联盟官网的所有英雄皮肤数据,包括高清原画和皮肤信息。
---
## 技术栈准备
### 所需工具
- Python 3.8+
- requests库(网络请求)
- BeautifulSoup4(HTML解析)
- json(数据处理)
- os(文件操作)
- concurrent.futures(多线程加速)
### 安装依赖
```bash
pip install requests beautifulsoup4
通过浏览器开发者工具分析英雄联盟官网(https://lol.qq.com/data/)发现:
- 英雄数据接口:https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js
- 皮肤数据接口:https://game.gtimg.cn/images/lol/act/img/js/hero/英雄ID.js
import requests
import json
import os
def get_hero_list():
    url = "https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
    }
    response = requests.get(url, headers=headers)
    data = json.loads(response.text[response.text.index('{'):])
    return data["hero"]
hero_list = get_hero_list()
print(f"共获取到{len(hero_list)}位英雄数据")
def get_hero_skins(hero_id):
    url = f"https://game.gtimg.cn/images/lol/act/img/js/hero/{hero_id}.js"
    response = requests.get(url)
    skin_data = json.loads(response.text[response.text.index('{'):])
    return skin_data["skins"]
# 示例:获取亚索(Yasuo)的皮肤
yasuo_skins = get_hero_skins("157")
print(json.dumps(yasuo_skins[:2], indent=2, ensure_ascii=False))
def download_skin(skin, save_dir="skins"):
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    hero_name = skin["heroName"]
    skin_name = skin["name"]
    skin_id = skin["skinId"]
    
    # 构造高清原画URL(1920x1080)
    img_url = f"https://game.gtimg.cn/images/lol/act/img/skin/big{skin_id}.jpg"
    
    try:
        response = requests.get(img_url, stream=True)
        if response.status_code == 200:
            file_path = f"{save_dir}/{hero_name}_{skin_name}.jpg"
            with open(file_path, 'wb') as f:
                for chunk in response.iter_content(1024):
                    f.write(chunk)
            print(f"已下载: {hero_name} - {skin_name}")
    except Exception as e:
        print(f"下载失败: {skin_name} - {str(e)}")
from concurrent.futures import ThreadPoolExecutor
def batch_download_skins(max_workers=5):
    hero_list = get_hero_list()
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        for hero in hero_list:
            skins = get_hero_skins(hero["heroId"])
            for skin in skins:
                if skin["mainImg"]:  # 过滤默认皮肤
                    executor.submit(download_skin, skin)
import requests
import json
import os
from concurrent.futures import ThreadPoolExecutor
class LOLSkinSpider:
    def __init__(self):
        self.base_url = "https://game.gtimg.cn/images/lol/act/img/js"
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
        }
    
    def get_hero_list(self):
        url = f"{self.base_url}/heroList/hero_list.js"
        response = requests.get(url, headers=self.headers)
        data = json.loads(response.text[response.text.index('{'):])
        return data["hero"]
    
    def get_hero_skins(self, hero_id):
        url = f"{self.base_url}/hero/{hero_id}.js"
        response = requests.get(url, headers=self.headers)
        skin_data = json.loads(response.text[response.text.index('{'):])
        return skin_data["skins"]
    
    def download_skin(self, skin, save_dir="skins"):
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        
        hero_name = skin["heroName"]
        skin_name = skin["name"].replace("/", "_")  # 处理特殊字符
        skin_id = skin["skinId"]
        
        img_url = f"https://game.gtimg.cn/images/lol/act/img/skin/big{skin_id}.jpg"
        
        try:
            response = requests.get(img_url, headers=self.headers, stream=True)
            if response.status_code == 200:
                file_path = f"{save_dir}/{hero_name}_{skin_name}.jpg"
                with open(file_path, 'wb') as f:
                    for chunk in response.iter_content(1024):
                        f.write(chunk)
                print(f"下载成功: {hero_name} - {skin_name}")
        except Exception as e:
            print(f"下载失败: {skin_name} - {str(e)}")
    
    def run(self, max_workers=5):
        heroes = self.get_hero_list()
        print(f"开始爬取{len(heroes)}位英雄的皮肤数据...")
        
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            for hero in heroes:
                skins = self.get_hero_skins(hero["heroId"])
                for skin in skins:
                    if skin["mainImg"] and skin["skinId"] != "0":  # 过滤默认皮肤
                        executor.submit(self.download_skin, skin)
        
        print("所有皮肤下载完成!")
if __name__ == "__main__":
    spider = LOLSkinSpider()
    spider.run()
def save_metadata(hero_list):
    all_skins = []
    for hero in hero_list:
        skins = get_hero_skins(hero["heroId"])
        all_skins.extend([{
            "hero_id": hero["heroId"],
            "hero_name": hero["name"],
            "skin_id": skin["skinId"],
            "skin_name": skin["name"],
            "price": skin.get("price", "未知"),
            "release_date": skin.get("publishTime", "未知")
        } for skin in skins if skin["mainImg"]])
    
    with open("lol_skins_metadata.json", "w", encoding="utf-8") as f:
        json.dump(all_skins, f, indent=2, ensure_ascii=False)
import hashlib
def remove_duplicates(dir_path):
    unique_files = {}
    for filename in os.listdir(dir_path):
        file_path = os.path.join(dir_path, filename)
        with open(file_path, "rb") as f:
            file_hash = hashlib.md5(f.read()).hexdigest()
        if file_hash not in unique_files:
            unique_files[file_hash] = file_path
        else:
            os.remove(file_path)
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
    "Referer": "https://lol.qq.com/data/",
    "Accept-Language": "zh-CN,zh;q=0.9"
}
import time
import random
def delayed_request(url):
    time.sleep(random.uniform(0.5, 1.5))  # 随机延迟
    return requests.get(url, headers=headers)
proxies = {
    "http": "http://your_proxy:port",
    "https": "https://your_proxy:port"
}
response = requests.get(url, headers=headers, proxies=proxies)
通过本文介绍的Python爬虫技术,我们可以高效地获取英雄联盟全英雄的皮肤数据。这套方法同样适用于其他游戏的数据采集,只需修改对应的接口地址即可。希望本文能帮助大家更好地理解网络爬虫的开发流程和实战技巧。
完整项目代码已上传GitHub:https://github.com/yourname/lol-skin-spider “`
(注:实际文章约2350字,此处为保持结构清晰做了适当精简,完整版可扩展各部分说明和代码注释)
免责声明:本站发布的内容(图片、视频和文字)以原创、转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:is@yisu.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。