代码
import time
import requests
from urllib.parse import urljoin
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
def fetch_data(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.json()
else:
print(f"请求失败,状态码:{response.status_code}")
return None
def save_image(image_url, title):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}
response = requests.get(image_url, headers=headers)
if response.status_code == 200:
if not os.path.exists('movie_images'):
os.makedirs('movie_images')
file_ext = os.path.splitext(image_url)[1]
with open(f'movie_images/{title}{file_ext}', 'wb') as f:
f.write(response.content)
print(f"{title} 图片保存成功")
else:
print(f"{title} 图片下载失败")
def download_single_movie(item):
title = item['title']
movie_url = item['url']
score = item['score']
image_url = item['cover_url']
print(f"标题: {title}, URL: {movie_url}, 分数: {score}")
save_image(image_url, title)
def main():
url = 'https://movie.douban.com/j/chart/top_list?type=11&interval_id=100%3A90&action='
data = fetch_data(url)
start_time = time.time()
if data:
with ThreadPoolExecutor(max_workers=20) as executor:
futures = [executor.submit(download_single_movie, item) for item in data]
for future in as_completed(futures):
try:
future.result()
except Exception as e:
print(f"发生错误: {e}")
end_time = time.time()
print(f"并发处理总耗时:{end_time - start_time:.2f} 秒")
if __name__ == "__main__":
main()