diff --git a/web/core/views.py b/web/core/views.py index 9b7e2bb..6c97e34 100644 --- a/web/core/views.py +++ b/web/core/views.py @@ -1,27 +1,19 @@ -# core/views.py -from __future__ import annotations - -import csv -import re -from datetime import date, timedelta -from typing import List, Optional, Tuple - -from django.contrib import messages +from django.shortcuts import render, redirect, get_object_or_404 from django.contrib.auth import authenticate, login from django.contrib.auth.decorators import login_required, user_passes_test -from django.db.models import Q from django.http import HttpResponse -from django.shortcuts import get_object_or_404, redirect, render +from django.contrib import messages +from django.db.models import Q from django.views.decorators.http import require_http_methods +from datetime import date, timedelta +import csv +import re -from .forms import EntryForm, ImportForm from .models import Entry -from .scripture_normalizer import normalize_scripture_field -from .utils import has_wildcards, import_csv_bytes, terms, wildcard_to_regex - -# -------------------------- -# Helpers / common -# -------------------------- +from .forms import ImportForm, EntryForm +from .utils import terms, has_wildcards, wildcard_to_regex, import_csv_bytes +from .scripture_normalizer import normalize_scripture_field # <-- NEW +from .source_normalizer import normalize_source_field # NEW # Order + labels used in the Search UI FIELD_ORDER = [ @@ -79,10 +71,6 @@ def entry_context(entry, result_ids): } -# -------------------------- -# Search / Navigation -# -------------------------- - @login_required def search_page(request): """ @@ -216,10 +204,6 @@ def entry_view(request, entry_id): return render(request, "entry_view.html", entry_context(entry, ids)) -# -------------------------- -# CRUD -# -------------------------- - @login_required def entry_add(request): """ @@ -285,10 +269,6 @@ def entry_delete(request, entry_id): return render(request, "entry_delete_confirm.html", {"entry": entry}) -# -------------------------- -# Import / Export -# -------------------------- - @login_required @user_passes_test(is_admin) def import_wizard(request): @@ -353,10 +333,6 @@ def export_csv(request): return response -# -------------------------- -# Stats + Scripture Analytics -# -------------------------- - @login_required def stats_page(request): total = Entry.objects.count() @@ -366,7 +342,6 @@ def stats_page(request): from collections import Counter - # last 12 months bar series months = [] y = today.year m = today.month @@ -393,14 +368,215 @@ def stats_page(request): for label, value in series ] - # top subjects counts = Counter() for subj in Entry.objects.exclude(subject="").values_list("subject", flat=True): for tag in [t.strip() for t in subj.split(",") if t.strip()]: counts[tag.lower()] += 1 top_subjects = [{"name": n.title(), "count": c} for n, c in counts.most_common(10)] - # scripture analytics + return render( + request, + "stats.html", + { + "total": total, + "last30": last30, + "last365": last365, + "series": series, + "heights": heights, + "top_subjects": top_subjects, + }, + ) + + +# ========= NEW: Scripture Normalizer endpoint ========= + +@login_required +@user_passes_test(is_admin) +@require_http_methods(["GET", "POST"]) +def normalize_scripture(request): + """ + GET -> dry-run preview (summary + first 100 examples) + POST -> apply changes to all entries' scripture_raw (batched) + Optional ?limit= for preview subset. + """ + apply = request.method == "POST" + limit = int(request.GET.get("limit", "0") or "0") + + qs = Entry.objects.all().order_by("id") + if limit: + qs = qs[:limit] + + changed = 0 + warnings_total = 0 + preview = [] + + if apply: + # write in batches to keep transactions short + from django.db import transaction + batch, pending = 500, [] + for e in qs.iterator(): + original = (e.scripture_raw or "").strip() + normalized, warns = normalize_scripture_field(original) + warnings_total += len(warns) + if normalized != original: + changed += 1 + preview.append((e.id, original, normalized)) + e.scripture_raw = normalized + pending.append(e) + if len(pending) >= batch: + with transaction.atomic(): + for obj in pending: + obj.save(update_fields=["scripture_raw"]) + pending.clear() + if pending: + with transaction.atomic(): + for obj in pending: + obj.save(update_fields=["scripture_raw"]) + else: + # dry-run only + for e in qs.iterator(): + original = (e.scripture_raw or "").strip() + normalized, warns = normalize_scripture_field(original) + warnings_total += len(warns) + if normalized != original: + changed += 1 + preview.append((e.id, original, normalized)) + + preview = preview[:100] # keep the table reasonable + + messages.info( + request, + f"{'Applied' if apply else 'Dry‑run'}: {changed} entries " + f"{'changed' if apply else 'would change'}; {warnings_total} warnings." + ) + return render( + request, + "normalize_result.html", + { + "applied": apply, + "changed": changed, + "warnings_total": warnings_total, + "preview": preview, + "limit": limit, + }, + ) + + from django.views.decorators.http import require_http_methods +from django.contrib.auth.decorators import login_required, user_passes_test + +@login_required +@user_passes_test(is_admin) +@require_http_methods(["GET", "POST"]) +def normalize_source(request): + """ + GET -> dry-run preview (summary + first 100 examples) + POST -> apply changes to all entries' source (batched) + Optional ?limit= for preview subset. + """ + apply = request.method == "POST" + limit = int(request.GET.get("limit", "0") or "0") + + qs = Entry.objects.all().order_by("id") + if limit: + qs = qs[:limit] + + changed = 0 + warnings_total = 0 + preview = [] + + if apply: + from django.db import transaction + batch, pending = 500, [] + for e in qs.iterator(): + original = (e.source or "").strip() + normalized, warns = normalize_source_field(original) + warnings_total += len(warns) + if normalized != original: + changed += 1 + preview.append((e.id, original, normalized)) + e.source = normalized + pending.append(e) + if len(pending) >= batch: + with transaction.atomic(): + for obj in pending: + obj.save(update_fields=["source"]) + pending.clear() + if pending: + with transaction.atomic(): + for obj in pending: + obj.save(update_fields=["source"]) + else: + # dry-run + for e in qs.iterator(): + original = (e.source or "").strip() + normalized, warns = normalize_source_field(original) + warnings_total += len(warns) + if normalized != original: + changed += 1 + preview.append((e.id, original, normalized)) + + preview = preview[:100] + + messages.info( + request, + f"{'Applied' if apply else 'Dry‑run'}: {changed} entries " + f"{'changed' if apply else 'would change'}; {warnings_total} warnings." + ) + return render( + request, + "normalize_source_result.html", + { + "applied": apply, + "changed": changed, + "warnings_total": warnings_total, + "preview": preview, + "limit": limit, + }, + ) + +@login_required +def stats_page(request): + from collections import Counter + total = Entry.objects.count() + today = date.today() + last30 = Entry.objects.filter(date_added__gte=today - timedelta(days=30)).count() + last365 = Entry.objects.filter(date_added__gte=today - timedelta(days=365)).count() + + # ---- Adds per month (existing logic) ---- + months = [] + y = today.year + m = today.month + for i in range(12): + mm = m - i + yy = y + while mm <= 0: + mm += 12 + yy -= 1 + from datetime import date as _d + start = _d(yy, mm, 1) + end = _d(yy + 1, 1, 1) if mm == 12 else _d(yy, mm + 1, 1) + label = f"{yy}-{mm:02d}" + months.append((label, start, end)) + months = list(reversed(months)) + + series = [ + (label, Entry.objects.filter(date_added__gte=start, date_added__lt=end).count()) + for label, start, end in months + ] + peak = max((v for _, v in series), default=1) + heights = [(label, value, 8 + int((value / peak) * 100) if peak else 8) + for label, value in series] + + # ---- Top subjects (existing logic) ---- + counts = Counter() + for subj in Entry.objects.exclude(subject="").values_list("subject", flat=True): + for tag in [t.strip() for t in subj.split(",") if t.strip()]: + counts[tag.lower()] += 1 + top_subjects = [{"name": n.title(), "count": c} for n, c in counts.most_common(10)] + + # ---- Scripture analytics (NEW) ---- + # Expect canonical like: "Matt. 5:14; Ps. 1:1,2; 1 Cor. 13:4-7" + # Split on semicolons; capture book and chap/verses if present. BOOK_RE = re.compile( r"^\s*(?P(?:[1-3]\s+)?[A-Za-z\.]+(?:\s+[A-Za-z\.]+){0,2})" r"(?:\s+(?P\d+(?::[\d,\-\u2013\u2014]+)?))?\s*$" @@ -442,7 +618,7 @@ def stats_page(request): (sum(ref_per_entry_counts) / len(ref_per_entry_counts)) if ref_per_entry_counts else 0.0, 2 ) - book_distribution = books_counter.most_common(30) + book_distribution = books_counter.most_common(30) # handy for future charts return render( request, @@ -454,321 +630,12 @@ def stats_page(request): "series": series, "heights": heights, "top_subjects": top_subjects, + + # NEW context for the template "entries_with_scripture": entries_with_scripture, "avg_refs_per_entry": avg_refs_per_entry, "top_books": top_books, "top_refs": top_refs, "book_distribution": book_distribution, }, - ) - - -# -------------------------- -# Tools: Scripture Normalizer -# -------------------------- - -@login_required -@user_passes_test(is_admin) -@require_http_methods(["GET", "POST"]) -def normalize_scripture(request): - """ - GET -> dry-run preview (summary + first 100 examples) - POST -> apply changes to all entries' scripture_raw (batched) - Optional ?limit= for preview subset. - """ - apply = request.method == "POST" - limit = int(request.GET.get("limit", "0") or "0") - - qs = Entry.objects.all().order_by("id") - if limit: - qs = qs[:limit] - - changed = 0 - warnings_total = 0 - preview = [] - - if apply: - # write in batches to keep transactions short - from django.db import transaction - batch, pending = 500, [] - for e in qs.iterator(): - original = (e.scripture_raw or "").strip() - normalized, warns = normalize_scripture_field(original) - warnings_total += len(warns) - if normalized != original: - changed += 1 - preview.append((e.id, original, normalized)) - e.scripture_raw = normalized - pending.append(e) - if len(pending) >= batch: - with transaction.atomic(): - for obj in pending: - obj.save(update_fields=["scripture_raw"]) - pending.clear() - if pending: - from django.db import transaction - with transaction.atomic(): - for obj in pending: - obj.save(update_fields=["scripture_raw"]) - else: - # dry-run only - for e in qs.iterator(): - original = (e.scripture_raw or "").strip() - normalized, warns = normalize_scripture_field(original) - warnings_total += len(warns) - if normalized != original: - changed += 1 - preview.append((e.id, original, normalized)) - - preview = preview[:100] # keep the table reasonable - - messages.info( - request, - f"{'Applied' if apply else 'Dry-run'}: {changed} entries " - f"{'changed' if apply else 'would change'}; {warnings_total} warnings." - ) - return render( - request, - "normalize_result.html", - { - "title": "Scripture Normalizer", - "applied": apply, - "changed": changed, - "warnings_total": warnings_total, - "preview": preview, - "limit": limit, - }, - ) - - -# -------------------------- -# Tools: Source Normalizer (WOL short-code) -# -------------------------- - -def _safe_wol_normalize(source_line: str) -> str: - """ - Try to normalize a source line to WOL-style notation using an external helper - if available; otherwise return the original line unchanged. - We attempt several likely function names to maximize compatibility. - """ - if not (source_line or "").strip(): - return source_line or "" - try: - # Attempt to import a helper module you added to your project. - from . import wol_citation_converter as _wol - except Exception: - return source_line - - for fn_name in ( - "normalize_wol_citation", - "convert_wol_citation", - "convert_source_line", - "normalize_source_line", - ): - try: - fn = getattr(_wol, fn_name, None) - if callable(fn): - out = fn(source_line) - return out if (out is not None) else source_line - except Exception: - continue - return source_line - - -@login_required -@user_passes_test(is_admin) -@require_http_methods(["GET", "POST"]) -def normalize_source(request): - """ - GET -> dry-run preview (summary + first 100 examples) - POST -> apply changes to all entries' source (batched) - Optional ?limit= for preview subset. - """ - apply = request.method == "POST" - limit = int(request.GET.get("limit", "0") or "0") - - qs = Entry.objects.all().order_by("id") - if limit: - qs = qs[:limit] - - changed = 0 - preview: List[Tuple[int, str, str]] = [] - - if apply: - from django.db import transaction - batch, pending = 500, [] - for e in qs.iterator(): - original = (e.source or "").strip() - normalized = _safe_wol_normalize(original) - if normalized != original: - changed += 1 - preview.append((e.id, original, normalized)) - e.source = normalized - pending.append(e) - if len(pending) >= batch: - with transaction.atomic(): - for obj in pending: - obj.save(update_fields=["source"]) - pending.clear() - if pending: - from django.db import transaction - with transaction.atomic(): - for obj in pending: - obj.save(update_fields=["source"]) - else: - # dry-run only - for e in qs.iterator(): - original = (e.source or "").strip() - normalized = _safe_wol_normalize(original) - if normalized != original: - changed += 1 - preview.append((e.id, original, normalized)) - - preview = preview[:100] - - messages.info( - request, - f"{'Applied' if apply else 'Dry-run'}: {changed} entries " - f"{'changed' if apply else 'would change'}." - ) - return render( - request, - "normalize_source_result.html", - { - "applied": apply, - "changed": changed, - "preview": preview, - "limit": limit, - }, - ) - - -# -------------------------- -# Tools: Subjects Normalizer (NEW) -# -------------------------- - -SUBJECT_SPLIT_PATTERN = re.compile( - r""" - \s* # optional space - (?: # any of the following delimiters: - ; # semicolon - | \| # vertical bar - | / # slash - | \\ # backslash - | · | • # bullets - | [–—] # en/em dash (only when surrounded by spaces) - | , # comma (we'll re-normalize commas too) - ) - \s* - """, - re.X, -) - -# For hyphen: only split when it's clearly used as a separator " - " (space-hyphen-space) -HARD_DASH_SEP = re.compile(r"\s-\s") - - -def _normalize_subjects_field(subject: str) -> str: - """ - Normalize the subject field to a clean, comma-separated list. - - Rules: - - Split on common delimiters: ';', '/', '\', '|', bullets, en/em dash. - - Convert " - " (space-hyphen-space) to a delimiter as well. - - Keep hyphens within words (no split on 'word-word'). - - Trim whitespace; drop empties. - - De-duplicate while preserving order. - """ - if not (subject or "").strip(): - return "" - - # First, convert " - " into a comma so we don't split on inner-word hyphens. - s = HARD_DASH_SEP.sub(", ", subject) - - # Now split on the big set (includes commas; we'll rebuild clean commas later) - parts = SUBJECT_SPLIT_PATTERN.split(s) - - cleaned: List[str] = [] - seen = set() - for p in parts: - p = " ".join((p or "").split()) # collapse inner whitespace - if not p: - continue - # Many entries already have comma-separated subjects; split those too - subparts = [q.strip() for q in p.split(",") if q.strip()] - for q in subparts: - # Preserve order; avoid duplicates - key = q.lower() - if key not in seen: - seen.add(key) - cleaned.append(q) - - return ", ".join(cleaned) - - -@login_required -@user_passes_test(is_admin) -@require_http_methods(["GET", "POST"]) -def normalize_subjects(request): - """ - GET -> dry-run preview (summary + first 100 examples) - POST -> apply changes to all entries' subject (batched) - Optional ?limit= for preview subset. - """ - apply = request.method == "POST" - limit = int(request.GET.get("limit", "0") or "0") - - qs = Entry.objects.all().order_by("id") - if limit: - qs = qs[:limit] - - changed = 0 - preview: List[Tuple[int, str, str]] = [] - - if apply: - from django.db import transaction - batch, pending = 500, [] - for e in qs.iterator(): - original = (e.subject or "").strip() - normalized = _normalize_subjects_field(original) - if normalized != original: - changed += 1 - preview.append((e.id, original, normalized)) - e.subject = normalized - pending.append(e) - if len(pending) >= batch: - with transaction.atomic(): - for obj in pending: - obj.save(update_fields=["subject"]) - pending.clear() - if pending: - from django.db import transaction - with transaction.atomic(): - for obj in pending: - obj.save(update_fields=["subject"]) - else: - # dry-run only - for e in qs.iterator(): - original = (e.subject or "").strip() - normalized = _normalize_subjects_field(original) - if normalized != original: - changed += 1 - preview.append((e.id, original, normalized)) - - preview = preview[:100] - - messages.info( - request, - f"{'Applied' if apply else 'Dry-run'}: {changed} entries " - f"{'changed' if apply else 'would change'}." - ) - return render( - request, - "normalize_subjects_result.html", - { - "applied": apply, - "changed": changed, - "preview": preview, - "limit": limit, - }, ) \ No newline at end of file