您好,登录后才能下订单哦!
密码登录
登录注册
点击 登录注册 即表示同意《亿速云用户服务条款》
# Python3怎么爬取英雄联盟所有英雄皮肤
## 前言
英雄联盟(League of Legends)作为全球最受欢迎的MOBA游戏之一,拥有超过150位英雄和数千款精美皮肤。本文将详细介绍如何使用Python3爬取英雄联盟官网的所有英雄皮肤数据,包括高清原画和皮肤信息。
---
## 技术栈准备
### 所需工具
- Python 3.8+
- requests库(网络请求)
- BeautifulSoup4(HTML解析)
- json(数据处理)
- os(文件操作)
- concurrent.futures(多线程加速)
### 安装依赖
```bash
pip install requests beautifulsoup4
通过浏览器开发者工具分析英雄联盟官网(https://lol.qq.com/data/)发现:
- 英雄数据接口:https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js
- 皮肤数据接口:https://game.gtimg.cn/images/lol/act/img/js/hero/英雄ID.js
import requests
import json
import os
def get_hero_list():
url = "https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}
response = requests.get(url, headers=headers)
data = json.loads(response.text[response.text.index('{'):])
return data["hero"]
hero_list = get_hero_list()
print(f"共获取到{len(hero_list)}位英雄数据")
def get_hero_skins(hero_id):
url = f"https://game.gtimg.cn/images/lol/act/img/js/hero/{hero_id}.js"
response = requests.get(url)
skin_data = json.loads(response.text[response.text.index('{'):])
return skin_data["skins"]
# 示例:获取亚索(Yasuo)的皮肤
yasuo_skins = get_hero_skins("157")
print(json.dumps(yasuo_skins[:2], indent=2, ensure_ascii=False))
def download_skin(skin, save_dir="skins"):
if not os.path.exists(save_dir):
os.makedirs(save_dir)
hero_name = skin["heroName"]
skin_name = skin["name"]
skin_id = skin["skinId"]
# 构造高清原画URL(1920x1080)
img_url = f"https://game.gtimg.cn/images/lol/act/img/skin/big{skin_id}.jpg"
try:
response = requests.get(img_url, stream=True)
if response.status_code == 200:
file_path = f"{save_dir}/{hero_name}_{skin_name}.jpg"
with open(file_path, 'wb') as f:
for chunk in response.iter_content(1024):
f.write(chunk)
print(f"已下载: {hero_name} - {skin_name}")
except Exception as e:
print(f"下载失败: {skin_name} - {str(e)}")
from concurrent.futures import ThreadPoolExecutor
def batch_download_skins(max_workers=5):
hero_list = get_hero_list()
with ThreadPoolExecutor(max_workers=max_workers) as executor:
for hero in hero_list:
skins = get_hero_skins(hero["heroId"])
for skin in skins:
if skin["mainImg"]: # 过滤默认皮肤
executor.submit(download_skin, skin)
import requests
import json
import os
from concurrent.futures import ThreadPoolExecutor
class LOLSkinSpider:
def __init__(self):
self.base_url = "https://game.gtimg.cn/images/lol/act/img/js"
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}
def get_hero_list(self):
url = f"{self.base_url}/heroList/hero_list.js"
response = requests.get(url, headers=self.headers)
data = json.loads(response.text[response.text.index('{'):])
return data["hero"]
def get_hero_skins(self, hero_id):
url = f"{self.base_url}/hero/{hero_id}.js"
response = requests.get(url, headers=self.headers)
skin_data = json.loads(response.text[response.text.index('{'):])
return skin_data["skins"]
def download_skin(self, skin, save_dir="skins"):
if not os.path.exists(save_dir):
os.makedirs(save_dir)
hero_name = skin["heroName"]
skin_name = skin["name"].replace("/", "_") # 处理特殊字符
skin_id = skin["skinId"]
img_url = f"https://game.gtimg.cn/images/lol/act/img/skin/big{skin_id}.jpg"
try:
response = requests.get(img_url, headers=self.headers, stream=True)
if response.status_code == 200:
file_path = f"{save_dir}/{hero_name}_{skin_name}.jpg"
with open(file_path, 'wb') as f:
for chunk in response.iter_content(1024):
f.write(chunk)
print(f"下载成功: {hero_name} - {skin_name}")
except Exception as e:
print(f"下载失败: {skin_name} - {str(e)}")
def run(self, max_workers=5):
heroes = self.get_hero_list()
print(f"开始爬取{len(heroes)}位英雄的皮肤数据...")
with ThreadPoolExecutor(max_workers=max_workers) as executor:
for hero in heroes:
skins = self.get_hero_skins(hero["heroId"])
for skin in skins:
if skin["mainImg"] and skin["skinId"] != "0": # 过滤默认皮肤
executor.submit(self.download_skin, skin)
print("所有皮肤下载完成!")
if __name__ == "__main__":
spider = LOLSkinSpider()
spider.run()
def save_metadata(hero_list):
all_skins = []
for hero in hero_list:
skins = get_hero_skins(hero["heroId"])
all_skins.extend([{
"hero_id": hero["heroId"],
"hero_name": hero["name"],
"skin_id": skin["skinId"],
"skin_name": skin["name"],
"price": skin.get("price", "未知"),
"release_date": skin.get("publishTime", "未知")
} for skin in skins if skin["mainImg"]])
with open("lol_skins_metadata.json", "w", encoding="utf-8") as f:
json.dump(all_skins, f, indent=2, ensure_ascii=False)
import hashlib
def remove_duplicates(dir_path):
unique_files = {}
for filename in os.listdir(dir_path):
file_path = os.path.join(dir_path, filename)
with open(file_path, "rb") as f:
file_hash = hashlib.md5(f.read()).hexdigest()
if file_hash not in unique_files:
unique_files[file_hash] = file_path
else:
os.remove(file_path)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
"Referer": "https://lol.qq.com/data/",
"Accept-Language": "zh-CN,zh;q=0.9"
}
import time
import random
def delayed_request(url):
time.sleep(random.uniform(0.5, 1.5)) # 随机延迟
return requests.get(url, headers=headers)
proxies = {
"http": "http://your_proxy:port",
"https": "https://your_proxy:port"
}
response = requests.get(url, headers=headers, proxies=proxies)
通过本文介绍的Python爬虫技术,我们可以高效地获取英雄联盟全英雄的皮肤数据。这套方法同样适用于其他游戏的数据采集,只需修改对应的接口地址即可。希望本文能帮助大家更好地理解网络爬虫的开发流程和实战技巧。
完整项目代码已上传GitHub:https://github.com/yourname/lol-skin-spider “`
(注:实际文章约2350字,此处为保持结构清晰做了适当精简,完整版可扩展各部分说明和代码注释)
免责声明:本站发布的内容(图片、视频和文字)以原创、转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:is@yisu.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。