From 514d2f0ef330cb551048a73608c483494488efd2 Mon Sep 17 00:00:00 2001 From: Joshua Laymon Date: Sat, 16 Aug 2025 15:53:26 +0000 Subject: [PATCH] Update web/core/views.py --- web/core/views.py | 93 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-) diff --git a/web/core/views.py b/web/core/views.py index 6c97e34..e535970 100644 --- a/web/core/views.py +++ b/web/core/views.py @@ -638,4 +638,95 @@ def stats_page(request): "top_refs": top_refs, "book_distribution": book_distribution, }, - ) \ No newline at end of file + ) + # -- helper: conservative subject splitter/cleaner +def _normalize_subject_line(raw: str) -> str: + """ + - Convert common separators (; |) to commas. + - Convert spaced dashes ( -, – , — when surrounded by spaces) to commas. + - Split on commas, trim, collapse double spaces, de-dup (case-insensitive). + - Preserve hyphenated terms like 'self-control' (we only replace dashes when spaced). + """ + if not raw: + return raw or "" + + s = raw + + # 1) unifying obvious separators + s = s.replace(";", ",").replace("|", ",") + + # 2) dash as separator ONLY when surrounded by spaces (won’t touch hyphenated words) + s = re.sub(r"\s+[–—-]\s+", ", ", s) + + # 3) normalize comma spacing + s = re.sub(r"\s*,\s*", ",", s) + + # 4) split, trim, collapse internal whitespace + parts = [re.sub(r"\s{2,}", " ", p.strip()) for p in s.split(",")] + parts = [p for p in parts if p] + + # 5) de-dup (case-insensitive) while preserving order + seen = set() + cleaned = [] + for p in parts: + key = p.lower() + if key not in seen: + cleaned.append(p) + seen.add(key) + + return ", ".join(cleaned) + + +def _is_staff(user): + return user.is_authenticated and user.is_staff + + +@user_passes_test(_is_staff) +@require_http_methods(["GET", "POST"]) +def normalize_subject(request): + """ + GET = dry-run preview (optional ?limit=) + POST = apply to all (optional hidden 'limit' too, but UI mirrors the other tools) + """ + dry_run = request.method == "GET" + limit_raw = request.GET.get("limit") if dry_run else request.POST.get("limit") + try: + limit = int(limit_raw) if (limit_raw and limit_raw.strip()) else 0 + except ValueError: + limit = 0 + + qs = Entry.objects.all().order_by("id") + total_considered = qs.count() + if limit > 0: + qs = qs[:limit] + + rows = [] + changed_count = 0 + + for e in qs: + before = e.subject or "" + after = _normalize_subject_line(before) + changed = (after != before) + + # For preview, list all considered entries; for apply, show only changed rows + if dry_run or changed: + rows.append({ + "id": e.id, + "before": before, + "after": after, + "changed": changed, + }) + + if not dry_run and changed: + e.subject = after + e.save(update_fields=["subject"]) + changed_count += 1 + + context = { + "dry_run": dry_run, + "limit": limit, + "total_considered": total_considered if limit == 0 else min(total_considered, limit), + "rows": rows, + "changed_count": changed_count, + } + return render(request, "normalize_subject_result.html", context) \ No newline at end of file