目标, 搞定汽车之家-A8测评
https://k.autohome.com.cn/146/
import requests
from pyquery import PyQuery as pq
f = open("奥迪A8.csv", mode="w", encoding='utf-8')
def download_page_source(url):
resp = requests.get(url)
resp.encoding = "gbk"
return resp.text
def parse_data(source):
query = pq(source)
divs = query("div.mt-10").items()
for div in divs:
# 科普: nth-child(n) 第n个元素
# dt:contains(购车经销商) 包含 购车经销商 字样的dt
if not div("div > dl:nth-child(3) > dt:contains(购车经销商)"):
# 科普: 在xxxx节点后插入xxxx节点
div("div > dl:nthchild(2)").after(pq("""<dl class="choose-dl">
<dt>购车经销商</dt>
<dd>
<a href="###" class="js-dearname" dataval='5928,47761' data-evalid="3651594"target="_blank"> </a>
</dd>
</dl>"""))
chexing = div("div > dl:nth-child(1)dd").eq(0).text().replace("\n", "").replace(" ","")
address = div("div > dl:nth-child(2)dd").text()
date = div("div > dl:nth-child(4)dd").text()
price = div("div > dl:nth-child(5)dd").text().replace("万元", "").strip()
youhao = div("div > dl:nth-child(6) ddp:nth-child(1)").text().replace("升/百公里","").strip()
gongli = div("div > dl:nth-child(6) ddp:nth-child(2)").text().replace("公里","").strip()
others = div("div > div > dl >dd").text().split() # 各个性能评分
f.write(f"{chexing},{address},{date},{price},{youhao},{gongli},{','.join(others)}\n")
def main(url):
page_source = download_page_source(url)
parse_data(page_source)
if __name__ == '__main__':
url = "https://k.autohome.com.cn/146/"
main(url)