diff --git a/web/core/management/commands/normalize_scripture.py b/web/core/management/commands/normalize_scripture.py new file mode 100644 index 0000000..3ac748b --- /dev/null +++ b/web/core/management/commands/normalize_scripture.py @@ -0,0 +1,63 @@ +# core/management/commands/normalize_scripture.py +from __future__ import annotations +from django.core.management.base import BaseCommand +from django.db import transaction +from core.models import Entry +from core.scripture_normalizer import normalize_scripture_field + +class Command(BaseCommand): + help = "Normalize scripture_raw for all entries. Use --apply to save; otherwise dry-run." + + def add_arguments(self, parser): + parser.add_argument("--apply", action="store_true", help="Persist changes.") + parser.add_argument("--limit", type=int, default=0, help="Limit rows for testing (0 = all).") + parser.add_argument("--batch", type=int, default=500, help="Apply in batches to avoid long transactions.") + + def handle(self, *args, **opts): + do_apply = opts["apply"] + limit = int(opts["limit"] or 0) + batch = int(opts["batch"] or 500) + + qs = Entry.objects.all().order_by("id") + if limit: + qs = qs[:limit] + + changed = 0 + warnings_total = 0 + preview_rows = [] + + pending: list[Entry] = [] + + for e in qs.iterator(): + original = (e.scripture_raw or "").strip() + normalized, warns = normalize_scripture_field(original) + + if warns: + warnings_total += len(warns) + + if normalized != original: + changed += 1 + preview_rows.append((e.id, original, normalized)) + + if do_apply: + e.scripture_raw = normalized + pending.append(e) + + if len(pending) >= batch: + with transaction.atomic(): + for obj in pending: + obj.save(update_fields=["scripture_raw"]) + pending.clear() + + if do_apply and pending: + with transaction.atomic(): + for obj in pending: + obj.save(update_fields=["scripture_raw"]) + + # Summarize to console + self.stdout.write(self.style.SUCCESS( + f"{'APPLIED' if do_apply else 'DRY-RUN'}: changed={changed}, warnings={warnings_total}, preview={min(100,len(preview_rows))} shown" + )) + # Print a small preview to console (first 100) + for rid, old, new in preview_rows[:100]: + self.stdout.write(f"[{rid}] {old!r} -> {new!r}") \ No newline at end of file