pyquery实战

目标, 搞定汽车之家-A8测评
https://k.autohome.com.cn/146/

import requests
from pyquery import PyQuery as pq
f = open("奥迪A8.csv", mode="w", encoding='utf-8')
def download_page_source(url):
    resp = requests.get(url)
    resp.encoding = "gbk"
    return resp.text
 
def parse_data(source):
    query = pq(source)
    divs = query("div.mt-10").items()
    for div in divs:
    # 科普: nth-child(n) 第n个元素
    # dt:contains(购车经销商) 包含 购车经销商 字样的dt
    if not div("div > dl:nth-child(3) > dt:contains(购车经销商)"):
        # 科普: 在xxxx节点后插入xxxx节点
        div("div > dl:nthchild(2)").after(pq("""<dl class="choose-dl">
        <dt>购车经销商</dt>
        <dd>
        <a href="###" class="js-dearname" dataval='5928,47761' data-evalid="3651594"target="_blank">&nbsp</a>
        </dd>
        </dl>"""))
        chexing = div("div > dl:nth-child(1)dd").eq(0).text().replace("\n", "").replace(" ","")
        address = div("div > dl:nth-child(2)dd").text()
        date = div("div > dl:nth-child(4)dd").text()
        price = div("div > dl:nth-child(5)dd").text().replace("万元", "").strip()
        youhao = div("div > dl:nth-child(6) ddp:nth-child(1)").text().replace("升/百公里","").strip()
        gongli = div("div > dl:nth-child(6) ddp:nth-child(2)").text().replace("公里","").strip()
        others = div("div > div > dl >dd").text().split() # 各个性能评分
        f.write(f"{chexing},{address},{date},{price},{youhao},{gongli},{','.join(others)}\n")

def main(url):
    page_source = download_page_source(url)
    parse_data(page_source)
if __name__ == '__main__':
    url = "https://k.autohome.com.cn/146/"
    main(url)

 

posted @ 2022-05-13 15:26  屠魔的少年  阅读(5)  评论(0)    收藏  举报