From c072ce486b2bf54c8719a99df9a01a40ce2cd0ff Mon Sep 17 00:00:00 2001 From: Joshua Laymon Date: Sat, 16 Aug 2025 16:10:33 +0000 Subject: [PATCH] Update web/core/views.py --- web/core/views.py | 612 +++++++++++++++++++++++++--------------------- 1 file changed, 327 insertions(+), 285 deletions(-) diff --git a/web/core/views.py b/web/core/views.py index e535970..9b7e2bb 100644 --- a/web/core/views.py +++ b/web/core/views.py @@ -1,19 +1,27 @@ -from django.shortcuts import render, redirect, get_object_or_404 -from django.contrib.auth import authenticate, login -from django.contrib.auth.decorators import login_required, user_passes_test -from django.http import HttpResponse -from django.contrib import messages -from django.db.models import Q -from django.views.decorators.http import require_http_methods -from datetime import date, timedelta +# core/views.py +from __future__ import annotations + import csv import re +from datetime import date, timedelta +from typing import List, Optional, Tuple +from django.contrib import messages +from django.contrib.auth import authenticate, login +from django.contrib.auth.decorators import login_required, user_passes_test +from django.db.models import Q +from django.http import HttpResponse +from django.shortcuts import get_object_or_404, redirect, render +from django.views.decorators.http import require_http_methods + +from .forms import EntryForm, ImportForm from .models import Entry -from .forms import ImportForm, EntryForm -from .utils import terms, has_wildcards, wildcard_to_regex, import_csv_bytes -from .scripture_normalizer import normalize_scripture_field # <-- NEW -from .source_normalizer import normalize_source_field # NEW +from .scripture_normalizer import normalize_scripture_field +from .utils import has_wildcards, import_csv_bytes, terms, wildcard_to_regex + +# -------------------------- +# Helpers / common +# -------------------------- # Order + labels used in the Search UI FIELD_ORDER = [ @@ -71,6 +79,10 @@ def entry_context(entry, result_ids): } +# -------------------------- +# Search / Navigation +# -------------------------- + @login_required def search_page(request): """ @@ -204,6 +216,10 @@ def entry_view(request, entry_id): return render(request, "entry_view.html", entry_context(entry, ids)) +# -------------------------- +# CRUD +# -------------------------- + @login_required def entry_add(request): """ @@ -269,6 +285,10 @@ def entry_delete(request, entry_id): return render(request, "entry_delete_confirm.html", {"entry": entry}) +# -------------------------- +# Import / Export +# -------------------------- + @login_required @user_passes_test(is_admin) def import_wizard(request): @@ -333,6 +353,10 @@ def export_csv(request): return response +# -------------------------- +# Stats + Scripture Analytics +# -------------------------- + @login_required def stats_page(request): total = Entry.objects.count() @@ -342,6 +366,7 @@ def stats_page(request): from collections import Counter + # last 12 months bar series months = [] y = today.year m = today.month @@ -368,215 +393,14 @@ def stats_page(request): for label, value in series ] + # top subjects counts = Counter() for subj in Entry.objects.exclude(subject="").values_list("subject", flat=True): for tag in [t.strip() for t in subj.split(",") if t.strip()]: counts[tag.lower()] += 1 top_subjects = [{"name": n.title(), "count": c} for n, c in counts.most_common(10)] - return render( - request, - "stats.html", - { - "total": total, - "last30": last30, - "last365": last365, - "series": series, - "heights": heights, - "top_subjects": top_subjects, - }, - ) - - -# ========= NEW: Scripture Normalizer endpoint ========= - -@login_required -@user_passes_test(is_admin) -@require_http_methods(["GET", "POST"]) -def normalize_scripture(request): - """ - GET -> dry-run preview (summary + first 100 examples) - POST -> apply changes to all entries' scripture_raw (batched) - Optional ?limit= for preview subset. - """ - apply = request.method == "POST" - limit = int(request.GET.get("limit", "0") or "0") - - qs = Entry.objects.all().order_by("id") - if limit: - qs = qs[:limit] - - changed = 0 - warnings_total = 0 - preview = [] - - if apply: - # write in batches to keep transactions short - from django.db import transaction - batch, pending = 500, [] - for e in qs.iterator(): - original = (e.scripture_raw or "").strip() - normalized, warns = normalize_scripture_field(original) - warnings_total += len(warns) - if normalized != original: - changed += 1 - preview.append((e.id, original, normalized)) - e.scripture_raw = normalized - pending.append(e) - if len(pending) >= batch: - with transaction.atomic(): - for obj in pending: - obj.save(update_fields=["scripture_raw"]) - pending.clear() - if pending: - with transaction.atomic(): - for obj in pending: - obj.save(update_fields=["scripture_raw"]) - else: - # dry-run only - for e in qs.iterator(): - original = (e.scripture_raw or "").strip() - normalized, warns = normalize_scripture_field(original) - warnings_total += len(warns) - if normalized != original: - changed += 1 - preview.append((e.id, original, normalized)) - - preview = preview[:100] # keep the table reasonable - - messages.info( - request, - f"{'Applied' if apply else 'Dry‑run'}: {changed} entries " - f"{'changed' if apply else 'would change'}; {warnings_total} warnings." - ) - return render( - request, - "normalize_result.html", - { - "applied": apply, - "changed": changed, - "warnings_total": warnings_total, - "preview": preview, - "limit": limit, - }, - ) - - from django.views.decorators.http import require_http_methods -from django.contrib.auth.decorators import login_required, user_passes_test - -@login_required -@user_passes_test(is_admin) -@require_http_methods(["GET", "POST"]) -def normalize_source(request): - """ - GET -> dry-run preview (summary + first 100 examples) - POST -> apply changes to all entries' source (batched) - Optional ?limit= for preview subset. - """ - apply = request.method == "POST" - limit = int(request.GET.get("limit", "0") or "0") - - qs = Entry.objects.all().order_by("id") - if limit: - qs = qs[:limit] - - changed = 0 - warnings_total = 0 - preview = [] - - if apply: - from django.db import transaction - batch, pending = 500, [] - for e in qs.iterator(): - original = (e.source or "").strip() - normalized, warns = normalize_source_field(original) - warnings_total += len(warns) - if normalized != original: - changed += 1 - preview.append((e.id, original, normalized)) - e.source = normalized - pending.append(e) - if len(pending) >= batch: - with transaction.atomic(): - for obj in pending: - obj.save(update_fields=["source"]) - pending.clear() - if pending: - with transaction.atomic(): - for obj in pending: - obj.save(update_fields=["source"]) - else: - # dry-run - for e in qs.iterator(): - original = (e.source or "").strip() - normalized, warns = normalize_source_field(original) - warnings_total += len(warns) - if normalized != original: - changed += 1 - preview.append((e.id, original, normalized)) - - preview = preview[:100] - - messages.info( - request, - f"{'Applied' if apply else 'Dry‑run'}: {changed} entries " - f"{'changed' if apply else 'would change'}; {warnings_total} warnings." - ) - return render( - request, - "normalize_source_result.html", - { - "applied": apply, - "changed": changed, - "warnings_total": warnings_total, - "preview": preview, - "limit": limit, - }, - ) - -@login_required -def stats_page(request): - from collections import Counter - total = Entry.objects.count() - today = date.today() - last30 = Entry.objects.filter(date_added__gte=today - timedelta(days=30)).count() - last365 = Entry.objects.filter(date_added__gte=today - timedelta(days=365)).count() - - # ---- Adds per month (existing logic) ---- - months = [] - y = today.year - m = today.month - for i in range(12): - mm = m - i - yy = y - while mm <= 0: - mm += 12 - yy -= 1 - from datetime import date as _d - start = _d(yy, mm, 1) - end = _d(yy + 1, 1, 1) if mm == 12 else _d(yy, mm + 1, 1) - label = f"{yy}-{mm:02d}" - months.append((label, start, end)) - months = list(reversed(months)) - - series = [ - (label, Entry.objects.filter(date_added__gte=start, date_added__lt=end).count()) - for label, start, end in months - ] - peak = max((v for _, v in series), default=1) - heights = [(label, value, 8 + int((value / peak) * 100) if peak else 8) - for label, value in series] - - # ---- Top subjects (existing logic) ---- - counts = Counter() - for subj in Entry.objects.exclude(subject="").values_list("subject", flat=True): - for tag in [t.strip() for t in subj.split(",") if t.strip()]: - counts[tag.lower()] += 1 - top_subjects = [{"name": n.title(), "count": c} for n, c in counts.most_common(10)] - - # ---- Scripture analytics (NEW) ---- - # Expect canonical like: "Matt. 5:14; Ps. 1:1,2; 1 Cor. 13:4-7" - # Split on semicolons; capture book and chap/verses if present. + # scripture analytics BOOK_RE = re.compile( r"^\s*(?P(?:[1-3]\s+)?[A-Za-z\.]+(?:\s+[A-Za-z\.]+){0,2})" r"(?:\s+(?P\d+(?::[\d,\-\u2013\u2014]+)?))?\s*$" @@ -618,7 +442,7 @@ def stats_page(request): (sum(ref_per_entry_counts) / len(ref_per_entry_counts)) if ref_per_entry_counts else 0.0, 2 ) - book_distribution = books_counter.most_common(30) # handy for future charts + book_distribution = books_counter.most_common(30) return render( request, @@ -630,8 +454,6 @@ def stats_page(request): "series": series, "heights": heights, "top_subjects": top_subjects, - - # NEW context for the template "entries_with_scripture": entries_with_scripture, "avg_refs_per_entry": avg_refs_per_entry, "top_books": top_books, @@ -639,94 +461,314 @@ def stats_page(request): "book_distribution": book_distribution, }, ) - # -- helper: conservative subject splitter/cleaner -def _normalize_subject_line(raw: str) -> str: + + +# -------------------------- +# Tools: Scripture Normalizer +# -------------------------- + +@login_required +@user_passes_test(is_admin) +@require_http_methods(["GET", "POST"]) +def normalize_scripture(request): """ - - Convert common separators (; |) to commas. - - Convert spaced dashes ( -, – , — when surrounded by spaces) to commas. - - Split on commas, trim, collapse double spaces, de-dup (case-insensitive). - - Preserve hyphenated terms like 'self-control' (we only replace dashes when spaced). + GET -> dry-run preview (summary + first 100 examples) + POST -> apply changes to all entries' scripture_raw (batched) + Optional ?limit= for preview subset. """ - if not raw: - return raw or "" + apply = request.method == "POST" + limit = int(request.GET.get("limit", "0") or "0") - s = raw + qs = Entry.objects.all().order_by("id") + if limit: + qs = qs[:limit] - # 1) unifying obvious separators - s = s.replace(";", ",").replace("|", ",") + changed = 0 + warnings_total = 0 + preview = [] - # 2) dash as separator ONLY when surrounded by spaces (won’t touch hyphenated words) - s = re.sub(r"\s+[–—-]\s+", ", ", s) + if apply: + # write in batches to keep transactions short + from django.db import transaction + batch, pending = 500, [] + for e in qs.iterator(): + original = (e.scripture_raw or "").strip() + normalized, warns = normalize_scripture_field(original) + warnings_total += len(warns) + if normalized != original: + changed += 1 + preview.append((e.id, original, normalized)) + e.scripture_raw = normalized + pending.append(e) + if len(pending) >= batch: + with transaction.atomic(): + for obj in pending: + obj.save(update_fields=["scripture_raw"]) + pending.clear() + if pending: + from django.db import transaction + with transaction.atomic(): + for obj in pending: + obj.save(update_fields=["scripture_raw"]) + else: + # dry-run only + for e in qs.iterator(): + original = (e.scripture_raw or "").strip() + normalized, warns = normalize_scripture_field(original) + warnings_total += len(warns) + if normalized != original: + changed += 1 + preview.append((e.id, original, normalized)) - # 3) normalize comma spacing - s = re.sub(r"\s*,\s*", ",", s) + preview = preview[:100] # keep the table reasonable - # 4) split, trim, collapse internal whitespace - parts = [re.sub(r"\s{2,}", " ", p.strip()) for p in s.split(",")] - parts = [p for p in parts if p] + messages.info( + request, + f"{'Applied' if apply else 'Dry-run'}: {changed} entries " + f"{'changed' if apply else 'would change'}; {warnings_total} warnings." + ) + return render( + request, + "normalize_result.html", + { + "title": "Scripture Normalizer", + "applied": apply, + "changed": changed, + "warnings_total": warnings_total, + "preview": preview, + "limit": limit, + }, + ) - # 5) de-dup (case-insensitive) while preserving order + +# -------------------------- +# Tools: Source Normalizer (WOL short-code) +# -------------------------- + +def _safe_wol_normalize(source_line: str) -> str: + """ + Try to normalize a source line to WOL-style notation using an external helper + if available; otherwise return the original line unchanged. + We attempt several likely function names to maximize compatibility. + """ + if not (source_line or "").strip(): + return source_line or "" + try: + # Attempt to import a helper module you added to your project. + from . import wol_citation_converter as _wol + except Exception: + return source_line + + for fn_name in ( + "normalize_wol_citation", + "convert_wol_citation", + "convert_source_line", + "normalize_source_line", + ): + try: + fn = getattr(_wol, fn_name, None) + if callable(fn): + out = fn(source_line) + return out if (out is not None) else source_line + except Exception: + continue + return source_line + + +@login_required +@user_passes_test(is_admin) +@require_http_methods(["GET", "POST"]) +def normalize_source(request): + """ + GET -> dry-run preview (summary + first 100 examples) + POST -> apply changes to all entries' source (batched) + Optional ?limit= for preview subset. + """ + apply = request.method == "POST" + limit = int(request.GET.get("limit", "0") or "0") + + qs = Entry.objects.all().order_by("id") + if limit: + qs = qs[:limit] + + changed = 0 + preview: List[Tuple[int, str, str]] = [] + + if apply: + from django.db import transaction + batch, pending = 500, [] + for e in qs.iterator(): + original = (e.source or "").strip() + normalized = _safe_wol_normalize(original) + if normalized != original: + changed += 1 + preview.append((e.id, original, normalized)) + e.source = normalized + pending.append(e) + if len(pending) >= batch: + with transaction.atomic(): + for obj in pending: + obj.save(update_fields=["source"]) + pending.clear() + if pending: + from django.db import transaction + with transaction.atomic(): + for obj in pending: + obj.save(update_fields=["source"]) + else: + # dry-run only + for e in qs.iterator(): + original = (e.source or "").strip() + normalized = _safe_wol_normalize(original) + if normalized != original: + changed += 1 + preview.append((e.id, original, normalized)) + + preview = preview[:100] + + messages.info( + request, + f"{'Applied' if apply else 'Dry-run'}: {changed} entries " + f"{'changed' if apply else 'would change'}." + ) + return render( + request, + "normalize_source_result.html", + { + "applied": apply, + "changed": changed, + "preview": preview, + "limit": limit, + }, + ) + + +# -------------------------- +# Tools: Subjects Normalizer (NEW) +# -------------------------- + +SUBJECT_SPLIT_PATTERN = re.compile( + r""" + \s* # optional space + (?: # any of the following delimiters: + ; # semicolon + | \| # vertical bar + | / # slash + | \\ # backslash + | · | • # bullets + | [–—] # en/em dash (only when surrounded by spaces) + | , # comma (we'll re-normalize commas too) + ) + \s* + """, + re.X, +) + +# For hyphen: only split when it's clearly used as a separator " - " (space-hyphen-space) +HARD_DASH_SEP = re.compile(r"\s-\s") + + +def _normalize_subjects_field(subject: str) -> str: + """ + Normalize the subject field to a clean, comma-separated list. + + Rules: + - Split on common delimiters: ';', '/', '\', '|', bullets, en/em dash. + - Convert " - " (space-hyphen-space) to a delimiter as well. + - Keep hyphens within words (no split on 'word-word'). + - Trim whitespace; drop empties. + - De-duplicate while preserving order. + """ + if not (subject or "").strip(): + return "" + + # First, convert " - " into a comma so we don't split on inner-word hyphens. + s = HARD_DASH_SEP.sub(", ", subject) + + # Now split on the big set (includes commas; we'll rebuild clean commas later) + parts = SUBJECT_SPLIT_PATTERN.split(s) + + cleaned: List[str] = [] seen = set() - cleaned = [] for p in parts: - key = p.lower() - if key not in seen: - cleaned.append(p) - seen.add(key) + p = " ".join((p or "").split()) # collapse inner whitespace + if not p: + continue + # Many entries already have comma-separated subjects; split those too + subparts = [q.strip() for q in p.split(",") if q.strip()] + for q in subparts: + # Preserve order; avoid duplicates + key = q.lower() + if key not in seen: + seen.add(key) + cleaned.append(q) return ", ".join(cleaned) -def _is_staff(user): - return user.is_authenticated and user.is_staff - - -@user_passes_test(_is_staff) +@login_required +@user_passes_test(is_admin) @require_http_methods(["GET", "POST"]) -def normalize_subject(request): +def normalize_subjects(request): """ - GET = dry-run preview (optional ?limit=) - POST = apply to all (optional hidden 'limit' too, but UI mirrors the other tools) + GET -> dry-run preview (summary + first 100 examples) + POST -> apply changes to all entries' subject (batched) + Optional ?limit= for preview subset. """ - dry_run = request.method == "GET" - limit_raw = request.GET.get("limit") if dry_run else request.POST.get("limit") - try: - limit = int(limit_raw) if (limit_raw and limit_raw.strip()) else 0 - except ValueError: - limit = 0 + apply = request.method == "POST" + limit = int(request.GET.get("limit", "0") or "0") qs = Entry.objects.all().order_by("id") - total_considered = qs.count() - if limit > 0: + if limit: qs = qs[:limit] - rows = [] - changed_count = 0 + changed = 0 + preview: List[Tuple[int, str, str]] = [] - for e in qs: - before = e.subject or "" - after = _normalize_subject_line(before) - changed = (after != before) + if apply: + from django.db import transaction + batch, pending = 500, [] + for e in qs.iterator(): + original = (e.subject or "").strip() + normalized = _normalize_subjects_field(original) + if normalized != original: + changed += 1 + preview.append((e.id, original, normalized)) + e.subject = normalized + pending.append(e) + if len(pending) >= batch: + with transaction.atomic(): + for obj in pending: + obj.save(update_fields=["subject"]) + pending.clear() + if pending: + from django.db import transaction + with transaction.atomic(): + for obj in pending: + obj.save(update_fields=["subject"]) + else: + # dry-run only + for e in qs.iterator(): + original = (e.subject or "").strip() + normalized = _normalize_subjects_field(original) + if normalized != original: + changed += 1 + preview.append((e.id, original, normalized)) - # For preview, list all considered entries; for apply, show only changed rows - if dry_run or changed: - rows.append({ - "id": e.id, - "before": before, - "after": after, - "changed": changed, - }) + preview = preview[:100] - if not dry_run and changed: - e.subject = after - e.save(update_fields=["subject"]) - changed_count += 1 - - context = { - "dry_run": dry_run, - "limit": limit, - "total_considered": total_considered if limit == 0 else min(total_considered, limit), - "rows": rows, - "changed_count": changed_count, - } - return render(request, "normalize_subject_result.html", context) \ No newline at end of file + messages.info( + request, + f"{'Applied' if apply else 'Dry-run'}: {changed} entries " + f"{'changed' if apply else 'would change'}." + ) + return render( + request, + "normalize_subjects_result.html", + { + "applied": apply, + "changed": changed, + "preview": preview, + "limit": limit, + }, + ) \ No newline at end of file