Scrapy提供了内置的XML和JSON解析器,可以方便地处理XML和JSON数据。
from scrapy.selector import Selector
xml_data = """
<bookstore>
<book category="cooking">
<title lang="en">Everyday Italian</title>
<author>Giada De Laurentiis</author>
<year>2005</year>
<price>30.00</price>
</book>
<book category="children">
<title lang="en">Harry Potter</title>
<author>J.K. Rowling</author>
<year>2005</year>
<price>29.99</price>
</book>
</bookstore>
"""
selector = Selector(text=xml_data)
titles = selector.xpath('//book/title/text()').extract()
authors = selector.xpath('//book/author/text()').extract()
for title, author in zip(titles, authors):
print(f"Title: {title}, Author: {author}")
import json
json_data = """
{
"bookstore": {
"books": [
{
"title": "Everyday Italian",
"author": "Giada De Laurentiis",
"year": 2005,
"price": 30.00
},
{
"title": "Harry Potter",
"author": "J.K. Rowling",
"year": 2005,
"price": 29.99
}
]
}
}
"""
response_dict = json.loads(json_data)
for book in response_dict['bookstore']['books']:
print(f"Title: {book['title']}, Author: {book['author']}")
通过以上方法,可以方便地处理XML和JSON数据,并提取需要的信息。