Update web/core/views.py

This commit is contained in:
Joshua Laymon 2025-08-16 16:10:33 +00:00
parent e494815f87
commit c072ce486b

View File

@ -1,19 +1,27 @@
from django.shortcuts import render, redirect, get_object_or_404
from django.contrib.auth import authenticate, login
from django.contrib.auth.decorators import login_required, user_passes_test
from django.http import HttpResponse
from django.contrib import messages
from django.db.models import Q
from django.views.decorators.http import require_http_methods
from datetime import date, timedelta
# core/views.py
from __future__ import annotations
import csv
import re
from datetime import date, timedelta
from typing import List, Optional, Tuple
from django.contrib import messages
from django.contrib.auth import authenticate, login
from django.contrib.auth.decorators import login_required, user_passes_test
from django.db.models import Q
from django.http import HttpResponse
from django.shortcuts import get_object_or_404, redirect, render
from django.views.decorators.http import require_http_methods
from .forms import EntryForm, ImportForm
from .models import Entry
from .forms import ImportForm, EntryForm
from .utils import terms, has_wildcards, wildcard_to_regex, import_csv_bytes
from .scripture_normalizer import normalize_scripture_field # <-- NEW
from .source_normalizer import normalize_source_field # NEW
from .scripture_normalizer import normalize_scripture_field
from .utils import has_wildcards, import_csv_bytes, terms, wildcard_to_regex
# --------------------------
# Helpers / common
# --------------------------
# Order + labels used in the Search UI
FIELD_ORDER = [
@ -71,6 +79,10 @@ def entry_context(entry, result_ids):
}
# --------------------------
# Search / Navigation
# --------------------------
@login_required
def search_page(request):
"""
@ -204,6 +216,10 @@ def entry_view(request, entry_id):
return render(request, "entry_view.html", entry_context(entry, ids))
# --------------------------
# CRUD
# --------------------------
@login_required
def entry_add(request):
"""
@ -269,6 +285,10 @@ def entry_delete(request, entry_id):
return render(request, "entry_delete_confirm.html", {"entry": entry})
# --------------------------
# Import / Export
# --------------------------
@login_required
@user_passes_test(is_admin)
def import_wizard(request):
@ -333,6 +353,10 @@ def export_csv(request):
return response
# --------------------------
# Stats + Scripture Analytics
# --------------------------
@login_required
def stats_page(request):
total = Entry.objects.count()
@ -342,6 +366,7 @@ def stats_page(request):
from collections import Counter
# last 12 months bar series
months = []
y = today.year
m = today.month
@ -368,215 +393,14 @@ def stats_page(request):
for label, value in series
]
# top subjects
counts = Counter()
for subj in Entry.objects.exclude(subject="").values_list("subject", flat=True):
for tag in [t.strip() for t in subj.split(",") if t.strip()]:
counts[tag.lower()] += 1
top_subjects = [{"name": n.title(), "count": c} for n, c in counts.most_common(10)]
return render(
request,
"stats.html",
{
"total": total,
"last30": last30,
"last365": last365,
"series": series,
"heights": heights,
"top_subjects": top_subjects,
},
)
# ========= NEW: Scripture Normalizer endpoint =========
@login_required
@user_passes_test(is_admin)
@require_http_methods(["GET", "POST"])
def normalize_scripture(request):
"""
GET -> dry-run preview (summary + first 100 examples)
POST -> apply changes to all entries' scripture_raw (batched)
Optional ?limit= for preview subset.
"""
apply = request.method == "POST"
limit = int(request.GET.get("limit", "0") or "0")
qs = Entry.objects.all().order_by("id")
if limit:
qs = qs[:limit]
changed = 0
warnings_total = 0
preview = []
if apply:
# write in batches to keep transactions short
from django.db import transaction
batch, pending = 500, []
for e in qs.iterator():
original = (e.scripture_raw or "").strip()
normalized, warns = normalize_scripture_field(original)
warnings_total += len(warns)
if normalized != original:
changed += 1
preview.append((e.id, original, normalized))
e.scripture_raw = normalized
pending.append(e)
if len(pending) >= batch:
with transaction.atomic():
for obj in pending:
obj.save(update_fields=["scripture_raw"])
pending.clear()
if pending:
with transaction.atomic():
for obj in pending:
obj.save(update_fields=["scripture_raw"])
else:
# dry-run only
for e in qs.iterator():
original = (e.scripture_raw or "").strip()
normalized, warns = normalize_scripture_field(original)
warnings_total += len(warns)
if normalized != original:
changed += 1
preview.append((e.id, original, normalized))
preview = preview[:100] # keep the table reasonable
messages.info(
request,
f"{'Applied' if apply else 'Dryrun'}: {changed} entries "
f"{'changed' if apply else 'would change'}; {warnings_total} warnings."
)
return render(
request,
"normalize_result.html",
{
"applied": apply,
"changed": changed,
"warnings_total": warnings_total,
"preview": preview,
"limit": limit,
},
)
from django.views.decorators.http import require_http_methods
from django.contrib.auth.decorators import login_required, user_passes_test
@login_required
@user_passes_test(is_admin)
@require_http_methods(["GET", "POST"])
def normalize_source(request):
"""
GET -> dry-run preview (summary + first 100 examples)
POST -> apply changes to all entries' source (batched)
Optional ?limit= for preview subset.
"""
apply = request.method == "POST"
limit = int(request.GET.get("limit", "0") or "0")
qs = Entry.objects.all().order_by("id")
if limit:
qs = qs[:limit]
changed = 0
warnings_total = 0
preview = []
if apply:
from django.db import transaction
batch, pending = 500, []
for e in qs.iterator():
original = (e.source or "").strip()
normalized, warns = normalize_source_field(original)
warnings_total += len(warns)
if normalized != original:
changed += 1
preview.append((e.id, original, normalized))
e.source = normalized
pending.append(e)
if len(pending) >= batch:
with transaction.atomic():
for obj in pending:
obj.save(update_fields=["source"])
pending.clear()
if pending:
with transaction.atomic():
for obj in pending:
obj.save(update_fields=["source"])
else:
# dry-run
for e in qs.iterator():
original = (e.source or "").strip()
normalized, warns = normalize_source_field(original)
warnings_total += len(warns)
if normalized != original:
changed += 1
preview.append((e.id, original, normalized))
preview = preview[:100]
messages.info(
request,
f"{'Applied' if apply else 'Dryrun'}: {changed} entries "
f"{'changed' if apply else 'would change'}; {warnings_total} warnings."
)
return render(
request,
"normalize_source_result.html",
{
"applied": apply,
"changed": changed,
"warnings_total": warnings_total,
"preview": preview,
"limit": limit,
},
)
@login_required
def stats_page(request):
from collections import Counter
total = Entry.objects.count()
today = date.today()
last30 = Entry.objects.filter(date_added__gte=today - timedelta(days=30)).count()
last365 = Entry.objects.filter(date_added__gte=today - timedelta(days=365)).count()
# ---- Adds per month (existing logic) ----
months = []
y = today.year
m = today.month
for i in range(12):
mm = m - i
yy = y
while mm <= 0:
mm += 12
yy -= 1
from datetime import date as _d
start = _d(yy, mm, 1)
end = _d(yy + 1, 1, 1) if mm == 12 else _d(yy, mm + 1, 1)
label = f"{yy}-{mm:02d}"
months.append((label, start, end))
months = list(reversed(months))
series = [
(label, Entry.objects.filter(date_added__gte=start, date_added__lt=end).count())
for label, start, end in months
]
peak = max((v for _, v in series), default=1)
heights = [(label, value, 8 + int((value / peak) * 100) if peak else 8)
for label, value in series]
# ---- Top subjects (existing logic) ----
counts = Counter()
for subj in Entry.objects.exclude(subject="").values_list("subject", flat=True):
for tag in [t.strip() for t in subj.split(",") if t.strip()]:
counts[tag.lower()] += 1
top_subjects = [{"name": n.title(), "count": c} for n, c in counts.most_common(10)]
# ---- Scripture analytics (NEW) ----
# Expect canonical like: "Matt. 5:14; Ps. 1:1,2; 1 Cor. 13:4-7"
# Split on semicolons; capture book and chap/verses if present.
# scripture analytics
BOOK_RE = re.compile(
r"^\s*(?P<book>(?:[1-3]\s+)?[A-Za-z\.]+(?:\s+[A-Za-z\.]+){0,2})"
r"(?:\s+(?P<cv>\d+(?::[\d,\-\u2013\u2014]+)?))?\s*$"
@ -618,7 +442,7 @@ def stats_page(request):
(sum(ref_per_entry_counts) / len(ref_per_entry_counts))
if ref_per_entry_counts else 0.0, 2
)
book_distribution = books_counter.most_common(30) # handy for future charts
book_distribution = books_counter.most_common(30)
return render(
request,
@ -630,8 +454,6 @@ def stats_page(request):
"series": series,
"heights": heights,
"top_subjects": top_subjects,
# NEW context for the template
"entries_with_scripture": entries_with_scripture,
"avg_refs_per_entry": avg_refs_per_entry,
"top_books": top_books,
@ -639,94 +461,314 @@ def stats_page(request):
"book_distribution": book_distribution,
},
)
# -- helper: conservative subject splitter/cleaner
def _normalize_subject_line(raw: str) -> str:
# --------------------------
# Tools: Scripture Normalizer
# --------------------------
@login_required
@user_passes_test(is_admin)
@require_http_methods(["GET", "POST"])
def normalize_scripture(request):
"""
- Convert common separators (; |) to commas.
- Convert spaced dashes ( -, , when surrounded by spaces) to commas.
- Split on commas, trim, collapse double spaces, de-dup (case-insensitive).
- Preserve hyphenated terms like 'self-control' (we only replace dashes when spaced).
GET -> dry-run preview (summary + first 100 examples)
POST -> apply changes to all entries' scripture_raw (batched)
Optional ?limit= for preview subset.
"""
if not raw:
return raw or ""
apply = request.method == "POST"
limit = int(request.GET.get("limit", "0") or "0")
s = raw
qs = Entry.objects.all().order_by("id")
if limit:
qs = qs[:limit]
# 1) unifying obvious separators
s = s.replace(";", ",").replace("|", ",")
changed = 0
warnings_total = 0
preview = []
# 2) dash as separator ONLY when surrounded by spaces (wont touch hyphenated words)
s = re.sub(r"\s+[–—-]\s+", ", ", s)
if apply:
# write in batches to keep transactions short
from django.db import transaction
batch, pending = 500, []
for e in qs.iterator():
original = (e.scripture_raw or "").strip()
normalized, warns = normalize_scripture_field(original)
warnings_total += len(warns)
if normalized != original:
changed += 1
preview.append((e.id, original, normalized))
e.scripture_raw = normalized
pending.append(e)
if len(pending) >= batch:
with transaction.atomic():
for obj in pending:
obj.save(update_fields=["scripture_raw"])
pending.clear()
if pending:
from django.db import transaction
with transaction.atomic():
for obj in pending:
obj.save(update_fields=["scripture_raw"])
else:
# dry-run only
for e in qs.iterator():
original = (e.scripture_raw or "").strip()
normalized, warns = normalize_scripture_field(original)
warnings_total += len(warns)
if normalized != original:
changed += 1
preview.append((e.id, original, normalized))
# 3) normalize comma spacing
s = re.sub(r"\s*,\s*", ",", s)
preview = preview[:100] # keep the table reasonable
# 4) split, trim, collapse internal whitespace
parts = [re.sub(r"\s{2,}", " ", p.strip()) for p in s.split(",")]
parts = [p for p in parts if p]
messages.info(
request,
f"{'Applied' if apply else 'Dry-run'}: {changed} entries "
f"{'changed' if apply else 'would change'}; {warnings_total} warnings."
)
return render(
request,
"normalize_result.html",
{
"title": "Scripture Normalizer",
"applied": apply,
"changed": changed,
"warnings_total": warnings_total,
"preview": preview,
"limit": limit,
},
)
# 5) de-dup (case-insensitive) while preserving order
# --------------------------
# Tools: Source Normalizer (WOL short-code)
# --------------------------
def _safe_wol_normalize(source_line: str) -> str:
"""
Try to normalize a source line to WOL-style notation using an external helper
if available; otherwise return the original line unchanged.
We attempt several likely function names to maximize compatibility.
"""
if not (source_line or "").strip():
return source_line or ""
try:
# Attempt to import a helper module you added to your project.
from . import wol_citation_converter as _wol
except Exception:
return source_line
for fn_name in (
"normalize_wol_citation",
"convert_wol_citation",
"convert_source_line",
"normalize_source_line",
):
try:
fn = getattr(_wol, fn_name, None)
if callable(fn):
out = fn(source_line)
return out if (out is not None) else source_line
except Exception:
continue
return source_line
@login_required
@user_passes_test(is_admin)
@require_http_methods(["GET", "POST"])
def normalize_source(request):
"""
GET -> dry-run preview (summary + first 100 examples)
POST -> apply changes to all entries' source (batched)
Optional ?limit= for preview subset.
"""
apply = request.method == "POST"
limit = int(request.GET.get("limit", "0") or "0")
qs = Entry.objects.all().order_by("id")
if limit:
qs = qs[:limit]
changed = 0
preview: List[Tuple[int, str, str]] = []
if apply:
from django.db import transaction
batch, pending = 500, []
for e in qs.iterator():
original = (e.source or "").strip()
normalized = _safe_wol_normalize(original)
if normalized != original:
changed += 1
preview.append((e.id, original, normalized))
e.source = normalized
pending.append(e)
if len(pending) >= batch:
with transaction.atomic():
for obj in pending:
obj.save(update_fields=["source"])
pending.clear()
if pending:
from django.db import transaction
with transaction.atomic():
for obj in pending:
obj.save(update_fields=["source"])
else:
# dry-run only
for e in qs.iterator():
original = (e.source or "").strip()
normalized = _safe_wol_normalize(original)
if normalized != original:
changed += 1
preview.append((e.id, original, normalized))
preview = preview[:100]
messages.info(
request,
f"{'Applied' if apply else 'Dry-run'}: {changed} entries "
f"{'changed' if apply else 'would change'}."
)
return render(
request,
"normalize_source_result.html",
{
"applied": apply,
"changed": changed,
"preview": preview,
"limit": limit,
},
)
# --------------------------
# Tools: Subjects Normalizer (NEW)
# --------------------------
SUBJECT_SPLIT_PATTERN = re.compile(
r"""
\s* # optional space
(?: # any of the following delimiters:
; # semicolon
| \| # vertical bar
| / # slash
| \\ # backslash
| · | # bullets
| [] # en/em dash (only when surrounded by spaces)
| , # comma (we'll re-normalize commas too)
)
\s*
""",
re.X,
)
# For hyphen: only split when it's clearly used as a separator " - " (space-hyphen-space)
HARD_DASH_SEP = re.compile(r"\s-\s")
def _normalize_subjects_field(subject: str) -> str:
"""
Normalize the subject field to a clean, comma-separated list.
Rules:
- Split on common delimiters: ';', '/', '\', '|', bullets, en/em dash.
- Convert " - " (space-hyphen-space) to a delimiter as well.
- Keep hyphens within words (no split on 'word-word').
- Trim whitespace; drop empties.
- De-duplicate while preserving order.
"""
if not (subject or "").strip():
return ""
# First, convert " - " into a comma so we don't split on inner-word hyphens.
s = HARD_DASH_SEP.sub(", ", subject)
# Now split on the big set (includes commas; we'll rebuild clean commas later)
parts = SUBJECT_SPLIT_PATTERN.split(s)
cleaned: List[str] = []
seen = set()
cleaned = []
for p in parts:
key = p.lower()
if key not in seen:
cleaned.append(p)
seen.add(key)
p = " ".join((p or "").split()) # collapse inner whitespace
if not p:
continue
# Many entries already have comma-separated subjects; split those too
subparts = [q.strip() for q in p.split(",") if q.strip()]
for q in subparts:
# Preserve order; avoid duplicates
key = q.lower()
if key not in seen:
seen.add(key)
cleaned.append(q)
return ", ".join(cleaned)
def _is_staff(user):
return user.is_authenticated and user.is_staff
@user_passes_test(_is_staff)
@login_required
@user_passes_test(is_admin)
@require_http_methods(["GET", "POST"])
def normalize_subject(request):
def normalize_subjects(request):
"""
GET = dry-run preview (optional ?limit=)
POST = apply to all (optional hidden 'limit' too, but UI mirrors the other tools)
GET -> dry-run preview (summary + first 100 examples)
POST -> apply changes to all entries' subject (batched)
Optional ?limit= for preview subset.
"""
dry_run = request.method == "GET"
limit_raw = request.GET.get("limit") if dry_run else request.POST.get("limit")
try:
limit = int(limit_raw) if (limit_raw and limit_raw.strip()) else 0
except ValueError:
limit = 0
apply = request.method == "POST"
limit = int(request.GET.get("limit", "0") or "0")
qs = Entry.objects.all().order_by("id")
total_considered = qs.count()
if limit > 0:
if limit:
qs = qs[:limit]
rows = []
changed_count = 0
changed = 0
preview: List[Tuple[int, str, str]] = []
for e in qs:
before = e.subject or ""
after = _normalize_subject_line(before)
changed = (after != before)
if apply:
from django.db import transaction
batch, pending = 500, []
for e in qs.iterator():
original = (e.subject or "").strip()
normalized = _normalize_subjects_field(original)
if normalized != original:
changed += 1
preview.append((e.id, original, normalized))
e.subject = normalized
pending.append(e)
if len(pending) >= batch:
with transaction.atomic():
for obj in pending:
obj.save(update_fields=["subject"])
pending.clear()
if pending:
from django.db import transaction
with transaction.atomic():
for obj in pending:
obj.save(update_fields=["subject"])
else:
# dry-run only
for e in qs.iterator():
original = (e.subject or "").strip()
normalized = _normalize_subjects_field(original)
if normalized != original:
changed += 1
preview.append((e.id, original, normalized))
# For preview, list all considered entries; for apply, show only changed rows
if dry_run or changed:
rows.append({
"id": e.id,
"before": before,
"after": after,
"changed": changed,
})
preview = preview[:100]
if not dry_run and changed:
e.subject = after
e.save(update_fields=["subject"])
changed_count += 1
context = {
"dry_run": dry_run,
"limit": limit,
"total_considered": total_considered if limit == 0 else min(total_considered, limit),
"rows": rows,
"changed_count": changed_count,
}
return render(request, "normalize_subject_result.html", context)
messages.info(
request,
f"{'Applied' if apply else 'Dry-run'}: {changed} entries "
f"{'changed' if apply else 'would change'}."
)
return render(
request,
"normalize_subjects_result.html",
{
"applied": apply,
"changed": changed,
"preview": preview,
"limit": limit,
},
)