From 46c0e4cc18daa991821f0133e44561bcc8f360a6 Mon Sep 17 00:00:00 2001 From: Joshua Laymon Date: Sat, 16 Aug 2025 15:54:10 +0000 Subject: [PATCH] Add web/core/normalize_subjects.py --- web/core/normalize_subjects.py | 47 ++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 web/core/normalize_subjects.py diff --git a/web/core/normalize_subjects.py b/web/core/normalize_subjects.py new file mode 100644 index 0000000..3fd2253 --- /dev/null +++ b/web/core/normalize_subjects.py @@ -0,0 +1,47 @@ +from django.core.management.base import BaseCommand +from core.models import Entry + +class Command(BaseCommand): + help = "Normalize subjects (split by commas, unify delimiters like ; and -)" + + def add_arguments(self, parser): + parser.add_argument( + "--dry-run", + action="store_true", + help="Preview changes without saving" + ) + + def handle(self, *args, **options): + dry_run = options["dry_run"] + changes = 0 + + for entry in Entry.objects.all(): + subj = entry.subject or "" + normalized = self.normalize_subjects(subj) + + if subj != normalized: + changes += 1 + self.stdout.write( + f"Would change: {subj} → {normalized}" + ) + if not dry_run: + entry.subject = normalized + entry.save(update_fields=["subject"]) + + if dry_run: + self.stdout.write(self.style.WARNING(f"[DRY RUN] {changes} subjects would be changed.")) + else: + self.stdout.write(self.style.SUCCESS(f"Updated {changes} subjects.")) + + def normalize_subjects(self, text: str) -> str: + if not text: + return "" + + # Unify delimiters into commas + cleaned = text.replace(";", ",").replace("|", ",").replace("/", ",").replace(" - ", ",").replace("-", ",") + + # Split, strip spaces, remove empties + parts = [p.strip() for p in cleaned.split(",") if p.strip()] + + # Rejoin with a standard ", " separator + return ", ".join(parts) \ No newline at end of file