# -*- coding: utf-8 -*- """从 Wikimedia Commons / Pexels 下载真实照片到 seed_assets/medias。""" import json import time import urllib.parse import urllib.request from pathlib import Path ROOT = Path(__file__).resolve().parents[1] OUT = ROOT / 'seed_assets' / 'medias' OUT.mkdir(parents=True, exist_ok=True) UA = 'DockScope/1.0 (educational demo)' # (本地文件名, Wikimedia 文件名 或 None, 备用直链 URL) ITEMS = [ ( '01_concrete_crack_bridge.jpg', 'Darmsheim_Brücke03_2010-06-29.jpg', None, ), ( '02_bridge_concrete_cracks.jpg', 'Darmsheim_Brücke04_2010-06-29.jpg', None, ), ( '03_steel_bridge_corrosion.jpg', 'Nandu_River_Iron_Bridge_corrosion_-_02.jpg', None, ), ( '04_concrete_bending_cracks.jpg', 'PHOTO_B_EMC_CemPozz_Feb_13.jpg', None, ), ( '05_bridge_substructure.jpg', 'I-35W_bridge_structure_before_collapse.jpg', None, ), ( '06_shrinkage_cracks_concrete.jpg', 'Beton-Schwindrisse.png', None, ), ( '07_asphalt_crocodile_cracking.jpg', 'Cracked_asphalt.jpg', None, ), ( '08_concrete_rebar_corrosion.jpg', 'Concrete_bridge_surface_reinforcement_corrosion_due_to_chlorides.jpg', 'https://images.pexels.com/photos/2219024/pexels-photo-2219024.jpeg?auto=compress&cs=tinysrgb&w=1600', ), ( '09_steel_beam_site.jpg', 'Steel_beams.jpg', None, ), ( '10_rust_metal_texture.jpg', None, 'https://images.pexels.com/photos/1157255/pexels-photo-1157255.jpeg?auto=compress&cs=tinysrgb&w=1600', ), ] API = 'https://commons.wikimedia.org/w/api.php' def commons_url(file_name: str) -> str | None: params = urllib.parse.urlencode( { 'action': 'query', 'titles': f'File:{file_name}', 'prop': 'imageinfo', 'iiprop': 'url', 'format': 'json', }, encoding='utf-8', ) req = urllib.request.Request(f'{API}?{params}', headers={'User-Agent': UA}) with urllib.request.urlopen(req, timeout=60) as resp: data = json.loads(resp.read().decode('utf-8')) for page in data.get('query', {}).get('pages', {}).values(): if 'missing' in page: return None info = page.get('imageinfo') or [] if info: return info[0].get('url') return None def download(url: str, dest: Path) -> bool: if dest.exists() and dest.stat().st_size > 20_000: print(f' skip {dest.name} ({dest.stat().st_size // 1024} KB)') return True req = urllib.request.Request(url, headers={'User-Agent': UA}) with urllib.request.urlopen(req, timeout=180) as resp: dest.write_bytes(resp.read()) print(f' ok {dest.name} ({dest.stat().st_size // 1024} KB)') return True def main(): for local, wiki, fallback in ITEMS: print(local) url = None if wiki: try: url = commons_url(wiki) except Exception as e: print(f' api error: {e}') if not url: url = fallback if not url: print(' no url') continue try: download(url, OUT / local) except Exception as e: print(f' fail: {e}') time.sleep(2.5) print('saved to', OUT) if __name__ == '__main__': main()