| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129 |
- # -*- coding: utf-8 -*-
- """从 Wikimedia Commons / Pexels 下载真实照片到 seed_assets/medias。"""
- import json
- import time
- import urllib.parse
- import urllib.request
- from pathlib import Path
- ROOT = Path(__file__).resolve().parents[1]
- OUT = ROOT / 'seed_assets' / 'medias'
- OUT.mkdir(parents=True, exist_ok=True)
- UA = 'DockScope/1.0 (educational demo)'
- # (本地文件名, Wikimedia 文件名 或 None, 备用直链 URL)
- ITEMS = [
- (
- '01_concrete_crack_bridge.jpg',
- 'Darmsheim_Brücke03_2010-06-29.jpg',
- None,
- ),
- (
- '02_bridge_concrete_cracks.jpg',
- 'Darmsheim_Brücke04_2010-06-29.jpg',
- None,
- ),
- (
- '03_steel_bridge_corrosion.jpg',
- 'Nandu_River_Iron_Bridge_corrosion_-_02.jpg',
- None,
- ),
- (
- '04_concrete_bending_cracks.jpg',
- 'PHOTO_B_EMC_CemPozz_Feb_13.jpg',
- None,
- ),
- (
- '05_bridge_substructure.jpg',
- 'I-35W_bridge_structure_before_collapse.jpg',
- None,
- ),
- (
- '06_shrinkage_cracks_concrete.jpg',
- 'Beton-Schwindrisse.png',
- None,
- ),
- (
- '07_asphalt_crocodile_cracking.jpg',
- 'Cracked_asphalt.jpg',
- None,
- ),
- (
- '08_concrete_rebar_corrosion.jpg',
- 'Concrete_bridge_surface_reinforcement_corrosion_due_to_chlorides.jpg',
- 'https://images.pexels.com/photos/2219024/pexels-photo-2219024.jpeg?auto=compress&cs=tinysrgb&w=1600',
- ),
- (
- '09_steel_beam_site.jpg',
- 'Steel_beams.jpg',
- None,
- ),
- (
- '10_rust_metal_texture.jpg',
- None,
- 'https://images.pexels.com/photos/1157255/pexels-photo-1157255.jpeg?auto=compress&cs=tinysrgb&w=1600',
- ),
- ]
- API = 'https://commons.wikimedia.org/w/api.php'
- def commons_url(file_name: str) -> str | None:
- params = urllib.parse.urlencode(
- {
- 'action': 'query',
- 'titles': f'File:{file_name}',
- 'prop': 'imageinfo',
- 'iiprop': 'url',
- 'format': 'json',
- },
- encoding='utf-8',
- )
- req = urllib.request.Request(f'{API}?{params}', headers={'User-Agent': UA})
- with urllib.request.urlopen(req, timeout=60) as resp:
- data = json.loads(resp.read().decode('utf-8'))
- for page in data.get('query', {}).get('pages', {}).values():
- if 'missing' in page:
- return None
- info = page.get('imageinfo') or []
- if info:
- return info[0].get('url')
- return None
- def download(url: str, dest: Path) -> bool:
- if dest.exists() and dest.stat().st_size > 20_000:
- print(f' skip {dest.name} ({dest.stat().st_size // 1024} KB)')
- return True
- req = urllib.request.Request(url, headers={'User-Agent': UA})
- with urllib.request.urlopen(req, timeout=180) as resp:
- dest.write_bytes(resp.read())
- print(f' ok {dest.name} ({dest.stat().st_size // 1024} KB)')
- return True
- def main():
- for local, wiki, fallback in ITEMS:
- print(local)
- url = None
- if wiki:
- try:
- url = commons_url(wiki)
- except Exception as e:
- print(f' api error: {e}')
- if not url:
- url = fallback
- if not url:
- print(' no url')
- continue
- try:
- download(url, OUT / local)
- except Exception as e:
- print(f' fail: {e}')
- time.sleep(2.5)
- print('saved to', OUT)
- if __name__ == '__main__':
- main()
|