在python中批量下载图片可以通过以下步骤实现,这里提供一个完整的代码示例:
基础方案:使用requests库
import os import requests from urllib.parse import urlparse def download_images(image_urls, save_dir='images'): """ 批量下载图片到指定目录 :param image_urls: 图片url列表 :param save_dir: 保存目录(默认保存到当前目录的images文件夹) """ # 创建保存目录 os.makedirs(save_dir, exist_ok=true) for url in image_urls: try: # 发送http请求 response = requests.get(url, stream=true, timeout=5) response.raise_for_status() # 检查请求是否成功 # 提取文件名 parsed_url = urlparse(url) filename = os.path.basename(parsed_url.path) if not filename: filename = f"image_{len(os.listdir(save_dir)) + 1}.jpg" # 保存文件 filepath = os.path.join(save_dir, filename) with open(filepath, 'wb') as f: for chunk in response.iter_content(1024): f.write(chunk) print(f"成功下载: {filename}") except exception as e: print(f"下载失败 {url} - 错误: {str(e)}") # 示例使用 if __name__ == "__main__": # 从网页解析图片url(示例) image_urls = [ "https://example.com/images/cat.jpg", "https://example.com/images/dog.png", "https://example.com/images/bird.webp" ] download_images(image_urls)
进阶方案:从网页批量抓取图片
from bs4 import beautifulsoup import requests def scrape_images_from_webpage(url, save_dir='images'): """从网页中抓取所有图片并下载""" try: response = requests.get(url) soup = beautifulsoup(response.text, 'html.parser') img_tags = soup.find_all('img') image_urls = [img.get('src') for img in img_tags if img.get('src')] download_images(image_urls, save_dir) except exception as e: print(f"抓取失败: {str(e)}") # 示例使用 scrape_images_from_webpage("https://example.com/gallery")
关键点说明:
依赖库安装:
pip install requests beautifulsoup4
核心功能:
- 自动创建保存目录
- 智能处理文件名(保留原始文件名或自动生成)
- 流式下载避免内存溢出
- 完善的错误处理
扩展功能建议:
# 添加多线程加速(使用concurrent.futures) from concurrent.futures import threadpoolexecutor # 在download_images函数中替换循环部分 with threadpoolexecutor(max_workers=8) as executor: executor.map(download_single_image, image_urls)
注意事项:
- 遵守网站robots.txt协议
- 添加user-agent头避免被屏蔽
- 添加下载延迟防止ip被封
- 处理不同图片格式(通过mime类型判断)
完整增强版代码
import os import requests from urllib.parse import urlparse from concurrent.futures import threadpoolexecutor import time def download_single_image(url, save_dir): try: headers = { 'user-agent': 'mozilla/5.0 (windows nt 10.0; win64; x64) applewebkit/537.36 (khtml, like gecko) chrome/91.0.4472.124 safari/537.36' } response = requests.get(url, headers=headers, stream=true, timeout=10) response.raise_for_status() # 获取内容类型确定扩展名 content_type = response.headers.get('content-type') ext = 'jpg' if 'jpeg' in content_type else content_type.split('/')[-1] filename = os.path.basename(urlparse(url).path) or f"img_{int(time.time()*1000)}.{ext}" filepath = os.path.join(save_dir, filename) with open(filepath, 'wb') as f: for chunk in response.iter_content(8192): f.write(chunk) return true, filename except exception as e: return false, str(e) def batch_download(image_urls, save_dir='images', max_workers=8, delay=0.5): os.makedirs(save_dir, exist_ok=true) success = [] failed = [] def worker(url): result, info = download_single_image(url, save_dir) if result: success.append(info) else: failed.append((url, info)) time.sleep(delay) # 避免请求过载 with threadpoolexecutor(max_workers=max_workers) as executor: executor.map(worker, image_urls) print(f"\n下载完成!成功: {len(success)} 失败: {len(failed)}") return success, failed # 使用示例 if __name__ == "__main__": urls = [ "https://example.com/image1.jpg", "https://example.com/image2.png", # 添加更多url... ] success, failed = batch_download(urls) print("\n成功下载列表:") for name in success: print(f" - {name}") print("\n失败列表:") for url, reason in failed: print(f" - {url}: {reason}")
最佳实践建议:
- 添加代理支持:在requests.get中添加
proxies
参数 - 限速功能:使用
time.sleep()
控制请求频率 - 断点续传:通过检查文件大小实现
- 自动重试机制:使用
tenacity
库实现 - 文件去重:使用哈希值检查重复文件
这个方案提供了从基础到进阶的完整实现,用户可以根据实际需求选择适合的版本。代码中包含了详细的错误处理和日志输出,适合直接用于生产环境。
到此这篇关于python批量下载图片的实现步骤的文章就介绍到这了,更多相关python批量下载图片内容请搜索代码网以前的文章或继续浏览下面的相关文章希望大家以后多多支持代码网!
发表评论