Add web/core/management/commands/normalize_scripture.py

This commit is contained in:
Joshua Laymon 2025-08-13 23:50:28 +00:00
parent b1731453ae
commit 4228192bc0

View File

@ -0,0 +1,63 @@
# core/management/commands/normalize_scripture.py
from __future__ import annotations
from django.core.management.base import BaseCommand
from django.db import transaction
from core.models import Entry
from core.scripture_normalizer import normalize_scripture_field
class Command(BaseCommand):
help = "Normalize scripture_raw for all entries. Use --apply to save; otherwise dry-run."
def add_arguments(self, parser):
parser.add_argument("--apply", action="store_true", help="Persist changes.")
parser.add_argument("--limit", type=int, default=0, help="Limit rows for testing (0 = all).")
parser.add_argument("--batch", type=int, default=500, help="Apply in batches to avoid long transactions.")
def handle(self, *args, **opts):
do_apply = opts["apply"]
limit = int(opts["limit"] or 0)
batch = int(opts["batch"] or 500)
qs = Entry.objects.all().order_by("id")
if limit:
qs = qs[:limit]
changed = 0
warnings_total = 0
preview_rows = []
pending: list[Entry] = []
for e in qs.iterator():
original = (e.scripture_raw or "").strip()
normalized, warns = normalize_scripture_field(original)
if warns:
warnings_total += len(warns)
if normalized != original:
changed += 1
preview_rows.append((e.id, original, normalized))
if do_apply:
e.scripture_raw = normalized
pending.append(e)
if len(pending) >= batch:
with transaction.atomic():
for obj in pending:
obj.save(update_fields=["scripture_raw"])
pending.clear()
if do_apply and pending:
with transaction.atomic():
for obj in pending:
obj.save(update_fields=["scripture_raw"])
# Summarize to console
self.stdout.write(self.style.SUCCESS(
f"{'APPLIED' if do_apply else 'DRY-RUN'}: changed={changed}, warnings={warnings_total}, preview={min(100,len(preview_rows))} shown"
))
# Print a small preview to console (first 100)
for rid, old, new in preview_rows[:100]:
self.stdout.write(f"[{rid}] {old!r} -> {new!r}")