Update web/core/views.py

2025-08-16 16:10:33 +00:00 · 2025-08-16 16:10:33 +00:00 · c072ce486b
commit c072ce486b
parent e494815f87
1 changed files with 327 additions and 285 deletions
--- a/web/core/views.py
+++ b/web/core/views.py
@ -1,19 +1,27 @@
-from django.shortcuts import render, redirect, get_object_or_404
-from django.contrib.auth import authenticate, login
-from django.contrib.auth.decorators import login_required, user_passes_test
-from django.http import HttpResponse
-from django.contrib import messages
-from django.db.models import Q
-from django.views.decorators.http import require_http_methods
-from datetime import date, timedelta
+# core/views.py
+from __future__ import annotations
+
 import csv
 import re
+from datetime import date, timedelta
+from typing import List, Optional, Tuple

+from django.contrib import messages
+from django.contrib.auth import authenticate, login
+from django.contrib.auth.decorators import login_required, user_passes_test
+from django.db.models import Q
+from django.http import HttpResponse
+from django.shortcuts import get_object_or_404, redirect, render
+from django.views.decorators.http import require_http_methods
+
+from .forms import EntryForm, ImportForm
 from .models import Entry
-from .forms import ImportForm, EntryForm
-from .utils import terms, has_wildcards, wildcard_to_regex, import_csv_bytes
-from .scripture_normalizer import normalize_scripture_field  # <-- NEW
-from .source_normalizer import normalize_source_field  # NEW
+from .scripture_normalizer import normalize_scripture_field
+from .utils import has_wildcards, import_csv_bytes, terms, wildcard_to_regex
+
+# --------------------------
+# Helpers / common
+# --------------------------

 # Order + labels used in the Search UI
 FIELD_ORDER = [
@ -71,6 +79,10 @@ def entry_context(entry, result_ids):
    }


+# --------------------------
+# Search / Navigation
+# --------------------------
+
@login_required
 def search_page(request):
    """
@ -204,6 +216,10 @@ def entry_view(request, entry_id):
    return render(request, "entry_view.html", entry_context(entry, ids))


+# --------------------------
+# CRUD
+# --------------------------
+
@login_required
 def entry_add(request):
    """
@ -269,6 +285,10 @@ def entry_delete(request, entry_id):
    return render(request, "entry_delete_confirm.html", {"entry": entry})


+# --------------------------
+# Import / Export
+# --------------------------
+
@login_required
@user_passes_test(is_admin)
 def import_wizard(request):
@ -333,6 +353,10 @@ def export_csv(request):
    return response


+# --------------------------
+# Stats + Scripture Analytics
+# --------------------------
+
@login_required
 def stats_page(request):
    total = Entry.objects.count()
@ -342,6 +366,7 @@ def stats_page(request):

    from collections import Counter

+    # last 12 months bar series
    months = []
    y = today.year
    m = today.month
@ -368,215 +393,14 @@ def stats_page(request):
        for label, value in series
    ]

+    # top subjects
    counts = Counter()
    for subj in Entry.objects.exclude(subject="").values_list("subject", flat=True):
        for tag in [t.strip() for t in subj.split(",") if t.strip()]:
            counts[tag.lower()] += 1
    top_subjects = [{"name": n.title(), "count": c} for n, c in counts.most_common(10)]

-    return render(
-        request,
-        "stats.html",
-        {
-            "total": total,
-            "last30": last30,
-            "last365": last365,
-            "series": series,
-            "heights": heights,
-            "top_subjects": top_subjects,
-        },
-    )
-
-
-# ========= NEW: Scripture Normalizer endpoint =========
-
-@login_required
-@user_passes_test(is_admin)
-@require_http_methods(["GET", "POST"])
-def normalize_scripture(request):
-    """
-    GET  -> dry-run preview (summary + first 100 examples)
-    POST -> apply changes to all entries' scripture_raw (batched)
-    Optional ?limit= for preview subset.
-    """
-    apply = request.method == "POST"
-    limit = int(request.GET.get("limit", "0") or "0")
-
-    qs = Entry.objects.all().order_by("id")
-    if limit:
-        qs = qs[:limit]
-
-    changed = 0
-    warnings_total = 0
-    preview = []
-
-    if apply:
-        # write in batches to keep transactions short
-        from django.db import transaction
-        batch, pending = 500, []
-        for e in qs.iterator():
-            original = (e.scripture_raw or "").strip()
-            normalized, warns = normalize_scripture_field(original)
-            warnings_total += len(warns)
-            if normalized != original:
-                changed += 1
-                preview.append((e.id, original, normalized))
-                e.scripture_raw = normalized
-                pending.append(e)
-                if len(pending) >= batch:
-                    with transaction.atomic():
-                        for obj in pending:
-                            obj.save(update_fields=["scripture_raw"])
-                    pending.clear()
-        if pending:
-            with transaction.atomic():
-                for obj in pending:
-                    obj.save(update_fields=["scripture_raw"])
-    else:
-        # dry-run only
-        for e in qs.iterator():
-            original = (e.scripture_raw or "").strip()
-            normalized, warns = normalize_scripture_field(original)
-            warnings_total += len(warns)
-            if normalized != original:
-                changed += 1
-                preview.append((e.id, original, normalized))
-
-    preview = preview[:100]  # keep the table reasonable
-
-    messages.info(
-        request,
-        f"{'Applied' if apply else 'Dry‑run'}: {changed} entries "
-        f"{'changed' if apply else 'would change'}; {warnings_total} warnings."
-    )
-    return render(
-        request,
-        "normalize_result.html",
-        {
-            "applied": apply,
-            "changed": changed,
-            "warnings_total": warnings_total,
-            "preview": preview,
-            "limit": limit,
-        },
-    )
-
-    from django.views.decorators.http import require_http_methods
-from django.contrib.auth.decorators import login_required, user_passes_test
-
-@login_required
-@user_passes_test(is_admin)
-@require_http_methods(["GET", "POST"])
-def normalize_source(request):
-    """
-    GET  -> dry-run preview (summary + first 100 examples)
-    POST -> apply changes to all entries' source (batched)
-    Optional ?limit= for preview subset.
-    """
-    apply = request.method == "POST"
-    limit = int(request.GET.get("limit", "0") or "0")
-
-    qs = Entry.objects.all().order_by("id")
-    if limit:
-        qs = qs[:limit]
-
-    changed = 0
-    warnings_total = 0
-    preview = []
-
-    if apply:
-        from django.db import transaction
-        batch, pending = 500, []
-        for e in qs.iterator():
-            original = (e.source or "").strip()
-            normalized, warns = normalize_source_field(original)
-            warnings_total += len(warns)
-            if normalized != original:
-                changed += 1
-                preview.append((e.id, original, normalized))
-                e.source = normalized
-                pending.append(e)
-                if len(pending) >= batch:
-                    with transaction.atomic():
-                        for obj in pending:
-                            obj.save(update_fields=["source"])
-                    pending.clear()
-        if pending:
-            with transaction.atomic():
-                for obj in pending:
-                    obj.save(update_fields=["source"])
-    else:
-        # dry-run
-        for e in qs.iterator():
-            original = (e.source or "").strip()
-            normalized, warns = normalize_source_field(original)
-            warnings_total += len(warns)
-            if normalized != original:
-                changed += 1
-                preview.append((e.id, original, normalized))
-
-    preview = preview[:100]
-
-    messages.info(
-        request,
-        f"{'Applied' if apply else 'Dry‑run'}: {changed} entries "
-        f"{'changed' if apply else 'would change'}; {warnings_total} warnings."
-    )
-    return render(
-        request,
-        "normalize_source_result.html",
-        {
-            "applied": apply,
-            "changed": changed,
-            "warnings_total": warnings_total,
-            "preview": preview,
-            "limit": limit,
-        },
-    )
-    
-@login_required
-def stats_page(request):
-    from collections import Counter
-    total = Entry.objects.count()
-    today = date.today()
-    last30 = Entry.objects.filter(date_added__gte=today - timedelta(days=30)).count()
-    last365 = Entry.objects.filter(date_added__gte=today - timedelta(days=365)).count()
-
-    # ---- Adds per month (existing logic) ----
-    months = []
-    y = today.year
-    m = today.month
-    for i in range(12):
-        mm = m - i
-        yy = y
-        while mm <= 0:
-            mm += 12
-            yy -= 1
-        from datetime import date as _d
-        start = _d(yy, mm, 1)
-        end = _d(yy + 1, 1, 1) if mm == 12 else _d(yy, mm + 1, 1)
-        label = f"{yy}-{mm:02d}"
-        months.append((label, start, end))
-    months = list(reversed(months))
-
-    series = [
-        (label, Entry.objects.filter(date_added__gte=start, date_added__lt=end).count())
-        for label, start, end in months
-    ]
-    peak = max((v for _, v in series), default=1)
-    heights = [(label, value, 8 + int((value / peak) * 100) if peak else 8)
-               for label, value in series]
-
-    # ---- Top subjects (existing logic) ----
-    counts = Counter()
-    for subj in Entry.objects.exclude(subject="").values_list("subject", flat=True):
-        for tag in [t.strip() for t in subj.split(",") if t.strip()]:
-            counts[tag.lower()] += 1
-    top_subjects = [{"name": n.title(), "count": c} for n, c in counts.most_common(10)]
-
-    # ---- Scripture analytics (NEW) ----
-    # Expect canonical like: "Matt. 5:14; Ps. 1:1,2; 1 Cor. 13:4-7"
-    # Split on semicolons; capture book and chap/verses if present.
+    # scripture analytics
    BOOK_RE = re.compile(
        r"^\s*(?P<book>(?:[1-3]\s+)?[A-Za-z\.]+(?:\s+[A-Za-z\.]+){0,2})"
        r"(?:\s+(?P<cv>\d+(?::[\d,\-\u2013\u2014]+)?))?\s*$"
@ -618,7 +442,7 @@ def stats_page(request):
        (sum(ref_per_entry_counts) / len(ref_per_entry_counts))
        if ref_per_entry_counts else 0.0, 2
    )
-    book_distribution = books_counter.most_common(30)  # handy for future charts
+    book_distribution = books_counter.most_common(30)

    return render(
        request,
@ -630,8 +454,6 @@ def stats_page(request):
            "series": series,
            "heights": heights,
            "top_subjects": top_subjects,
-
-            # NEW context for the template
            "entries_with_scripture": entries_with_scripture,
            "avg_refs_per_entry": avg_refs_per_entry,
            "top_books": top_books,
@ -639,94 +461,314 @@ def stats_page(request):
            "book_distribution": book_distribution,
        },
    )
-    # -- helper: conservative subject splitter/cleaner
-def _normalize_subject_line(raw: str) -> str:
+
+
+# --------------------------
+# Tools: Scripture Normalizer
+# --------------------------
+
+@login_required
+@user_passes_test(is_admin)
+@require_http_methods(["GET", "POST"])
+def normalize_scripture(request):
    """
-    - Convert common separators (; |) to commas.
-    - Convert spaced dashes ( -, – , — when surrounded by spaces) to commas.
-    - Split on commas, trim, collapse double spaces, de-dup (case-insensitive).
-    - Preserve hyphenated terms like 'self-control' (we only replace dashes when spaced).
+    GET  -> dry-run preview (summary + first 100 examples)
+    POST -> apply changes to all entries' scripture_raw (batched)
+    Optional ?limit= for preview subset.
    """
-    if not raw:
-        return raw or ""
+    apply = request.method == "POST"
+    limit = int(request.GET.get("limit", "0") or "0")

-    s = raw
+    qs = Entry.objects.all().order_by("id")
+    if limit:
+        qs = qs[:limit]

-    # 1) unifying obvious separators
-    s = s.replace(";", ",").replace("|", ",")
+    changed = 0
+    warnings_total = 0
+    preview = []

-    # 2) dash as separator ONLY when surrounded by spaces (won’t touch hyphenated words)
-    s = re.sub(r"\s+[–—-]\s+", ", ", s)
+    if apply:
+        # write in batches to keep transactions short
+        from django.db import transaction
+        batch, pending = 500, []
+        for e in qs.iterator():
+            original = (e.scripture_raw or "").strip()
+            normalized, warns = normalize_scripture_field(original)
+            warnings_total += len(warns)
+            if normalized != original:
+                changed += 1
+                preview.append((e.id, original, normalized))
+                e.scripture_raw = normalized
+                pending.append(e)
+                if len(pending) >= batch:
+                    with transaction.atomic():
+                        for obj in pending:
+                            obj.save(update_fields=["scripture_raw"])
+                    pending.clear()
+        if pending:
+            from django.db import transaction
+            with transaction.atomic():
+                for obj in pending:
+                    obj.save(update_fields=["scripture_raw"])
+    else:
+        # dry-run only
+        for e in qs.iterator():
+            original = (e.scripture_raw or "").strip()
+            normalized, warns = normalize_scripture_field(original)
+            warnings_total += len(warns)
+            if normalized != original:
+                changed += 1
+                preview.append((e.id, original, normalized))

-    # 3) normalize comma spacing
-    s = re.sub(r"\s*,\s*", ",", s)
+    preview = preview[:100]  # keep the table reasonable

-    # 4) split, trim, collapse internal whitespace
-    parts = [re.sub(r"\s{2,}", " ", p.strip()) for p in s.split(",")]
-    parts = [p for p in parts if p]
+    messages.info(
+        request,
+        f"{'Applied' if apply else 'Dry-run'}: {changed} entries "
+        f"{'changed' if apply else 'would change'}; {warnings_total} warnings."
+    )
+    return render(
+        request,
+        "normalize_result.html",
+        {
+            "title": "Scripture Normalizer",
+            "applied": apply,
+            "changed": changed,
+            "warnings_total": warnings_total,
+            "preview": preview,
+            "limit": limit,
+        },
+    )

-    # 5) de-dup (case-insensitive) while preserving order
+
+# --------------------------
+# Tools: Source Normalizer (WOL short-code)
+# --------------------------
+
+def _safe_wol_normalize(source_line: str) -> str:
+    """
+    Try to normalize a source line to WOL-style notation using an external helper
+    if available; otherwise return the original line unchanged.
+    We attempt several likely function names to maximize compatibility.
+    """
+    if not (source_line or "").strip():
+        return source_line or ""
+    try:
+        # Attempt to import a helper module you added to your project.
+        from . import wol_citation_converter as _wol
+    except Exception:
+        return source_line
+
+    for fn_name in (
+        "normalize_wol_citation",
+        "convert_wol_citation",
+        "convert_source_line",
+        "normalize_source_line",
+    ):
+        try:
+            fn = getattr(_wol, fn_name, None)
+            if callable(fn):
+                out = fn(source_line)
+                return out if (out is not None) else source_line
+        except Exception:
+            continue
+    return source_line
+
+
+@login_required
+@user_passes_test(is_admin)
+@require_http_methods(["GET", "POST"])
+def normalize_source(request):
+    """
+    GET  -> dry-run preview (summary + first 100 examples)
+    POST -> apply changes to all entries' source (batched)
+    Optional ?limit= for preview subset.
+    """
+    apply = request.method == "POST"
+    limit = int(request.GET.get("limit", "0") or "0")
+
+    qs = Entry.objects.all().order_by("id")
+    if limit:
+        qs = qs[:limit]
+
+    changed = 0
+    preview: List[Tuple[int, str, str]] = []
+
+    if apply:
+        from django.db import transaction
+        batch, pending = 500, []
+        for e in qs.iterator():
+            original = (e.source or "").strip()
+            normalized = _safe_wol_normalize(original)
+            if normalized != original:
+                changed += 1
+                preview.append((e.id, original, normalized))
+                e.source = normalized
+                pending.append(e)
+                if len(pending) >= batch:
+                    with transaction.atomic():
+                        for obj in pending:
+                            obj.save(update_fields=["source"])
+                    pending.clear()
+        if pending:
+            from django.db import transaction
+            with transaction.atomic():
+                for obj in pending:
+                    obj.save(update_fields=["source"])
+    else:
+        # dry-run only
+        for e in qs.iterator():
+            original = (e.source or "").strip()
+            normalized = _safe_wol_normalize(original)
+            if normalized != original:
+                changed += 1
+                preview.append((e.id, original, normalized))
+
+    preview = preview[:100]
+
+    messages.info(
+        request,
+        f"{'Applied' if apply else 'Dry-run'}: {changed} entries "
+        f"{'changed' if apply else 'would change'}."
+    )
+    return render(
+        request,
+        "normalize_source_result.html",
+        {
+            "applied": apply,
+            "changed": changed,
+            "preview": preview,
+            "limit": limit,
+        },
+    )
+
+
+# --------------------------
+# Tools: Subjects Normalizer (NEW)
+# --------------------------
+
+SUBJECT_SPLIT_PATTERN = re.compile(
+    r"""
+    \s*                                   # optional space
+    (?:                                   # any of the following delimiters:
+      ;                                   # semicolon
+      | \|                                # vertical bar
+      | /                                 # slash
+      | \\                                # backslash
+      | · | •                             # bullets
+      | [–—]                              # en/em dash (only when surrounded by spaces)
+      | ,                                 # comma (we'll re-normalize commas too)
+    )
+    \s*
+    """,
+    re.X,
+)
+
+# For hyphen: only split when it's clearly used as a separator " - " (space-hyphen-space)
+HARD_DASH_SEP = re.compile(r"\s-\s")
+
+
+def _normalize_subjects_field(subject: str) -> str:
+    """
+    Normalize the subject field to a clean, comma-separated list.
+
+    Rules:
+      - Split on common delimiters: ';', '/', '\', '|', bullets, en/em dash.
+      - Convert " - " (space-hyphen-space) to a delimiter as well.
+      - Keep hyphens within words (no split on 'word-word').
+      - Trim whitespace; drop empties.
+      - De-duplicate while preserving order.
+    """
+    if not (subject or "").strip():
+        return ""
+
+    # First, convert " - " into a comma so we don't split on inner-word hyphens.
+    s = HARD_DASH_SEP.sub(", ", subject)
+
+    # Now split on the big set (includes commas; we'll rebuild clean commas later)
+    parts = SUBJECT_SPLIT_PATTERN.split(s)
+
+    cleaned: List[str] = []
    seen = set()
-    cleaned = []
    for p in parts:
-        key = p.lower()
-        if key not in seen:
-            cleaned.append(p)
-            seen.add(key)
+        p = " ".join((p or "").split())  # collapse inner whitespace
+        if not p:
+            continue
+        # Many entries already have comma-separated subjects; split those too
+        subparts = [q.strip() for q in p.split(",") if q.strip()]
+        for q in subparts:
+            # Preserve order; avoid duplicates
+            key = q.lower()
+            if key not in seen:
+                seen.add(key)
+                cleaned.append(q)

    return ", ".join(cleaned)


-def _is_staff(user):
-    return user.is_authenticated and user.is_staff
-
-
-@user_passes_test(_is_staff)
+@login_required
+@user_passes_test(is_admin)
@require_http_methods(["GET", "POST"])
-def normalize_subject(request):
+def normalize_subjects(request):
    """
-    GET  = dry-run preview (optional ?limit=)
-    POST = apply to all (optional hidden 'limit' too, but UI mirrors the other tools)
+    GET  -> dry-run preview (summary + first 100 examples)
+    POST -> apply changes to all entries' subject (batched)
+    Optional ?limit= for preview subset.
    """
-    dry_run = request.method == "GET"
-    limit_raw = request.GET.get("limit") if dry_run else request.POST.get("limit")
-    try:
-        limit = int(limit_raw) if (limit_raw and limit_raw.strip()) else 0
-    except ValueError:
-        limit = 0
+    apply = request.method == "POST"
+    limit = int(request.GET.get("limit", "0") or "0")

    qs = Entry.objects.all().order_by("id")
-    total_considered = qs.count()
-    if limit > 0:
+    if limit:
        qs = qs[:limit]

-    rows = []
-    changed_count = 0
+    changed = 0
+    preview: List[Tuple[int, str, str]] = []

-    for e in qs:
-        before = e.subject or ""
-        after = _normalize_subject_line(before)
-        changed = (after != before)
+    if apply:
+        from django.db import transaction
+        batch, pending = 500, []
+        for e in qs.iterator():
+            original = (e.subject or "").strip()
+            normalized = _normalize_subjects_field(original)
+            if normalized != original:
+                changed += 1
+                preview.append((e.id, original, normalized))
+                e.subject = normalized
+                pending.append(e)
+                if len(pending) >= batch:
+                    with transaction.atomic():
+                        for obj in pending:
+                            obj.save(update_fields=["subject"])
+                    pending.clear()
+        if pending:
+            from django.db import transaction
+            with transaction.atomic():
+                for obj in pending:
+                    obj.save(update_fields=["subject"])
+    else:
+        # dry-run only
+        for e in qs.iterator():
+            original = (e.subject or "").strip()
+            normalized = _normalize_subjects_field(original)
+            if normalized != original:
+                changed += 1
+                preview.append((e.id, original, normalized))

-        # For preview, list all considered entries; for apply, show only changed rows
-        if dry_run or changed:
-            rows.append({
-                "id": e.id,
-                "before": before,
-                "after": after,
-                "changed": changed,
-            })
+    preview = preview[:100]

-        if not dry_run and changed:
-            e.subject = after
-            e.save(update_fields=["subject"])
-            changed_count += 1
-
-    context = {
-        "dry_run": dry_run,
-        "limit": limit,
-        "total_considered": total_considered if limit == 0 else min(total_considered, limit),
-        "rows": rows,
-        "changed_count": changed_count,
-    }
-    return render(request, "normalize_subject_result.html", context)
+    messages.info(
+        request,
+        f"{'Applied' if apply else 'Dry-run'}: {changed} entries "
+        f"{'changed' if apply else 'would change'}."
+    )
+    return render(
+        request,
+        "normalize_subjects_result.html",
+        {
+            "applied": apply,
+            "changed": changed,
+            "preview": preview,
+            "limit": limit,
+        },
+    )