From 8ae283fe418a030114f0d7a9df23916f36a40c31 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 16 May 2026 16:45:31 +0000 Subject: [PATCH] v0.4 Phase 3: asset validation in mdcms build - Add validate_assets(): scans config.yml and theme.yml recursively for string values starting with assets/, and all markdown files in pages/ and posts/ via regex; deduplicates by (path, source) before checking - Add _collect_yaml_assets() helper for recursive YAML traversal - Call validate_assets() in run_build() after writing nav.yml and search.json; prints yellow warnings for each missing asset, build continues on all warnings https://claude.ai/code/session_015XtsgTMi8UtmgxEgb5Qt2c --- mdcms.py | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/mdcms.py b/mdcms.py index c092823..8b298cd 100644 --- a/mdcms.py +++ b/mdcms.py @@ -339,6 +339,66 @@ def generate_search_json( return json.dumps(out, indent=2, ensure_ascii=False) +# ─── Asset validation ───────────────────────────────────────── + +_ASSET_RE = re.compile(r'assets/[\w.\-/]+') + + +def _collect_yaml_assets(val, source: str, out: list): + if isinstance(val, str): + if val.startswith("assets/"): + out.append((val, source)) + elif isinstance(val, dict): + for v in val.values(): + _collect_yaml_assets(v, source, out) + elif isinstance(val, list): + for item in val: + _collect_yaml_assets(item, source, out) + + +def validate_assets(site_path: Path, cfg: dict) -> list: + """Return list of warning strings for assets/ references that don't exist on disk.""" + refs: list = [] + + _collect_yaml_assets(cfg, "config.yml", refs) + + theme_file = cfg.get("theme") + if theme_file: + theme_path = site_path / theme_file + if theme_path.exists(): + try: + theme_data = yaml.safe_load(theme_path.read_text(encoding="utf-8")) or {} + _collect_yaml_assets(theme_data, theme_file, refs) + except (OSError, yaml.YAMLError): + pass + + for folder in ("pages", "posts"): + d = site_path / folder + if not d.is_dir(): + continue + for md_file in sorted(d.rglob("*.md")): + try: + content = md_file.read_text(encoding="utf-8") + rel = str(md_file.relative_to(site_path)).replace("\\", "/") + for m in _ASSET_RE.finditer(content): + refs.append((m.group(), rel)) + except OSError: + pass + + warnings = [] + seen: set = set() + for asset_path, source in refs: + key = (asset_path, source) + if key in seen: + continue + seen.add(key) + if not (site_path / asset_path).exists(): + warnings.append( + f"Warning: asset not found: {asset_path}\n Referenced in: {source}" + ) + return warnings + + # ─── Core build logic ───────────────────────────────────────── def run_build(site_path: Path): @@ -420,6 +480,10 @@ def run_build(site_path: Path): ) click.echo(f" Wrote search.json ({len(live_pages) + len(post_records)} entries)") + asset_warnings = validate_assets(site_path, cfg) + for w in asset_warnings: + click.echo(click.style(w, fg="yellow")) + if auto_created: click.echo(click.style( f"\nNotice: {len(auto_created)} section(s) auto-created: {', '.join(auto_created)}\n"