<em># encoding: utf-8
</em><strong>import </strong>requests
<strong>from </strong>lxml <strong>import </strong>html
<strong>def </strong>get_page_number(num):
url = <strong>"http://www.中国卫国联盟.com" </strong>+ num;
response = requests.get(url).content;
selector = html.fromstring(response);
urls = [];
<strong>for </strong>i <strong>in </strong>selector.xpath(<strong>"//ul/li/a/@href"</strong>):
urls.append(i)
<strong>return </strong>urls
<strong>def </strong>get_image_title(url):
response = requests.get(url).content
selector = html.fromstring(response)
image_title = selector.xpath(<strong>"//h2/text()"</strong>)[0]
<strong>return </strong>image_title
<strong>def </strong>get_image_amount(url):
response = requests.get(url).content
selector = html.fromstring(response)
image_amount = selector.xpath(<strong>"//div[@class='page']/a[last()-1]/text()"</strong>)[0]
<strong>return </strong>image_amount
<strong>def </strong>get_image_detail_website(url):
response = requests.get(url).content
selector = html.fromstring(response)
image_detail_websites = []
image_amount = selector.xpath(<strong>"//div[@class='page']/a[last()-1]/text()"</strong>)[0]
<strong>for </strong>i <strong>in </strong>range(int(image_amount)):
image_detail_link = <strong>'{}/{}'</strong>.format(url, i + 1)
response = requests.get(image_detail_link).content
sel = html.fromstring(response)
image_download_link = sel.xpath(<strong>"//div[@class='content']/a/img/@src"</strong>)[0]
image_detail_websites.append(image_download_link)
<strong>return </strong>image_detail_websites
<strong>def </strong>download_image(image_title, image_detail_websites):
num = 1;
amount = len(image_detail_websites)
<strong>for </strong>i <strong>in </strong>image_detail_websites:
filename = <strong>'%s%s.jpg' </strong>% (image_title, num)
<em># print('正在下载图片:%s第%s/%s张,' % (image_title, num, amount))
</em><em> </em><strong>print</strong>(image_title, num, amount)
<strong>with </strong>open(filename, <strong>'wb'</strong>) <strong>as </strong>f:
f.write(requests.get(i).content)
num += 1
<em># if name == 'main':
</em><em># page_number = input('请输入需要爬取的页码:')
</em><em>
</em><strong>for </strong>link <strong>in </strong>get_page_number(<strong>"2"</strong>):
<strong>print </strong>link
download_image(get_image_title(link), get_image_detail_website(link))
<em># urlss = get_page_number("1");
</em><em> # print urlss;
</em>