Update web/core/views.py
This commit is contained in:
+216
-349
@@ -1,27 +1,19 @@
|
|||||||
# core/views.py
|
from django.shortcuts import render, redirect, get_object_or_404
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import csv
|
|
||||||
import re
|
|
||||||
from datetime import date, timedelta
|
|
||||||
from typing import List, Optional, Tuple
|
|
||||||
|
|
||||||
from django.contrib import messages
|
|
||||||
from django.contrib.auth import authenticate, login
|
from django.contrib.auth import authenticate, login
|
||||||
from django.contrib.auth.decorators import login_required, user_passes_test
|
from django.contrib.auth.decorators import login_required, user_passes_test
|
||||||
from django.db.models import Q
|
|
||||||
from django.http import HttpResponse
|
from django.http import HttpResponse
|
||||||
from django.shortcuts import get_object_or_404, redirect, render
|
from django.contrib import messages
|
||||||
|
from django.db.models import Q
|
||||||
from django.views.decorators.http import require_http_methods
|
from django.views.decorators.http import require_http_methods
|
||||||
|
from datetime import date, timedelta
|
||||||
|
import csv
|
||||||
|
import re
|
||||||
|
|
||||||
from .forms import EntryForm, ImportForm
|
|
||||||
from .models import Entry
|
from .models import Entry
|
||||||
from .scripture_normalizer import normalize_scripture_field
|
from .forms import ImportForm, EntryForm
|
||||||
from .utils import has_wildcards, import_csv_bytes, terms, wildcard_to_regex
|
from .utils import terms, has_wildcards, wildcard_to_regex, import_csv_bytes
|
||||||
|
from .scripture_normalizer import normalize_scripture_field # <-- NEW
|
||||||
# --------------------------
|
from .source_normalizer import normalize_source_field # NEW
|
||||||
# Helpers / common
|
|
||||||
# --------------------------
|
|
||||||
|
|
||||||
# Order + labels used in the Search UI
|
# Order + labels used in the Search UI
|
||||||
FIELD_ORDER = [
|
FIELD_ORDER = [
|
||||||
@@ -79,10 +71,6 @@ def entry_context(entry, result_ids):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# --------------------------
|
|
||||||
# Search / Navigation
|
|
||||||
# --------------------------
|
|
||||||
|
|
||||||
@login_required
|
@login_required
|
||||||
def search_page(request):
|
def search_page(request):
|
||||||
"""
|
"""
|
||||||
@@ -216,10 +204,6 @@ def entry_view(request, entry_id):
|
|||||||
return render(request, "entry_view.html", entry_context(entry, ids))
|
return render(request, "entry_view.html", entry_context(entry, ids))
|
||||||
|
|
||||||
|
|
||||||
# --------------------------
|
|
||||||
# CRUD
|
|
||||||
# --------------------------
|
|
||||||
|
|
||||||
@login_required
|
@login_required
|
||||||
def entry_add(request):
|
def entry_add(request):
|
||||||
"""
|
"""
|
||||||
@@ -285,10 +269,6 @@ def entry_delete(request, entry_id):
|
|||||||
return render(request, "entry_delete_confirm.html", {"entry": entry})
|
return render(request, "entry_delete_confirm.html", {"entry": entry})
|
||||||
|
|
||||||
|
|
||||||
# --------------------------
|
|
||||||
# Import / Export
|
|
||||||
# --------------------------
|
|
||||||
|
|
||||||
@login_required
|
@login_required
|
||||||
@user_passes_test(is_admin)
|
@user_passes_test(is_admin)
|
||||||
def import_wizard(request):
|
def import_wizard(request):
|
||||||
@@ -353,10 +333,6 @@ def export_csv(request):
|
|||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
# --------------------------
|
|
||||||
# Stats + Scripture Analytics
|
|
||||||
# --------------------------
|
|
||||||
|
|
||||||
@login_required
|
@login_required
|
||||||
def stats_page(request):
|
def stats_page(request):
|
||||||
total = Entry.objects.count()
|
total = Entry.objects.count()
|
||||||
@@ -366,7 +342,6 @@ def stats_page(request):
|
|||||||
|
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
|
|
||||||
# last 12 months bar series
|
|
||||||
months = []
|
months = []
|
||||||
y = today.year
|
y = today.year
|
||||||
m = today.month
|
m = today.month
|
||||||
@@ -393,14 +368,215 @@ def stats_page(request):
|
|||||||
for label, value in series
|
for label, value in series
|
||||||
]
|
]
|
||||||
|
|
||||||
# top subjects
|
|
||||||
counts = Counter()
|
counts = Counter()
|
||||||
for subj in Entry.objects.exclude(subject="").values_list("subject", flat=True):
|
for subj in Entry.objects.exclude(subject="").values_list("subject", flat=True):
|
||||||
for tag in [t.strip() for t in subj.split(",") if t.strip()]:
|
for tag in [t.strip() for t in subj.split(",") if t.strip()]:
|
||||||
counts[tag.lower()] += 1
|
counts[tag.lower()] += 1
|
||||||
top_subjects = [{"name": n.title(), "count": c} for n, c in counts.most_common(10)]
|
top_subjects = [{"name": n.title(), "count": c} for n, c in counts.most_common(10)]
|
||||||
|
|
||||||
# scripture analytics
|
return render(
|
||||||
|
request,
|
||||||
|
"stats.html",
|
||||||
|
{
|
||||||
|
"total": total,
|
||||||
|
"last30": last30,
|
||||||
|
"last365": last365,
|
||||||
|
"series": series,
|
||||||
|
"heights": heights,
|
||||||
|
"top_subjects": top_subjects,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ========= NEW: Scripture Normalizer endpoint =========
|
||||||
|
|
||||||
|
@login_required
|
||||||
|
@user_passes_test(is_admin)
|
||||||
|
@require_http_methods(["GET", "POST"])
|
||||||
|
def normalize_scripture(request):
|
||||||
|
"""
|
||||||
|
GET -> dry-run preview (summary + first 100 examples)
|
||||||
|
POST -> apply changes to all entries' scripture_raw (batched)
|
||||||
|
Optional ?limit= for preview subset.
|
||||||
|
"""
|
||||||
|
apply = request.method == "POST"
|
||||||
|
limit = int(request.GET.get("limit", "0") or "0")
|
||||||
|
|
||||||
|
qs = Entry.objects.all().order_by("id")
|
||||||
|
if limit:
|
||||||
|
qs = qs[:limit]
|
||||||
|
|
||||||
|
changed = 0
|
||||||
|
warnings_total = 0
|
||||||
|
preview = []
|
||||||
|
|
||||||
|
if apply:
|
||||||
|
# write in batches to keep transactions short
|
||||||
|
from django.db import transaction
|
||||||
|
batch, pending = 500, []
|
||||||
|
for e in qs.iterator():
|
||||||
|
original = (e.scripture_raw or "").strip()
|
||||||
|
normalized, warns = normalize_scripture_field(original)
|
||||||
|
warnings_total += len(warns)
|
||||||
|
if normalized != original:
|
||||||
|
changed += 1
|
||||||
|
preview.append((e.id, original, normalized))
|
||||||
|
e.scripture_raw = normalized
|
||||||
|
pending.append(e)
|
||||||
|
if len(pending) >= batch:
|
||||||
|
with transaction.atomic():
|
||||||
|
for obj in pending:
|
||||||
|
obj.save(update_fields=["scripture_raw"])
|
||||||
|
pending.clear()
|
||||||
|
if pending:
|
||||||
|
with transaction.atomic():
|
||||||
|
for obj in pending:
|
||||||
|
obj.save(update_fields=["scripture_raw"])
|
||||||
|
else:
|
||||||
|
# dry-run only
|
||||||
|
for e in qs.iterator():
|
||||||
|
original = (e.scripture_raw or "").strip()
|
||||||
|
normalized, warns = normalize_scripture_field(original)
|
||||||
|
warnings_total += len(warns)
|
||||||
|
if normalized != original:
|
||||||
|
changed += 1
|
||||||
|
preview.append((e.id, original, normalized))
|
||||||
|
|
||||||
|
preview = preview[:100] # keep the table reasonable
|
||||||
|
|
||||||
|
messages.info(
|
||||||
|
request,
|
||||||
|
f"{'Applied' if apply else 'Dry‑run'}: {changed} entries "
|
||||||
|
f"{'changed' if apply else 'would change'}; {warnings_total} warnings."
|
||||||
|
)
|
||||||
|
return render(
|
||||||
|
request,
|
||||||
|
"normalize_result.html",
|
||||||
|
{
|
||||||
|
"applied": apply,
|
||||||
|
"changed": changed,
|
||||||
|
"warnings_total": warnings_total,
|
||||||
|
"preview": preview,
|
||||||
|
"limit": limit,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
from django.views.decorators.http import require_http_methods
|
||||||
|
from django.contrib.auth.decorators import login_required, user_passes_test
|
||||||
|
|
||||||
|
@login_required
|
||||||
|
@user_passes_test(is_admin)
|
||||||
|
@require_http_methods(["GET", "POST"])
|
||||||
|
def normalize_source(request):
|
||||||
|
"""
|
||||||
|
GET -> dry-run preview (summary + first 100 examples)
|
||||||
|
POST -> apply changes to all entries' source (batched)
|
||||||
|
Optional ?limit= for preview subset.
|
||||||
|
"""
|
||||||
|
apply = request.method == "POST"
|
||||||
|
limit = int(request.GET.get("limit", "0") or "0")
|
||||||
|
|
||||||
|
qs = Entry.objects.all().order_by("id")
|
||||||
|
if limit:
|
||||||
|
qs = qs[:limit]
|
||||||
|
|
||||||
|
changed = 0
|
||||||
|
warnings_total = 0
|
||||||
|
preview = []
|
||||||
|
|
||||||
|
if apply:
|
||||||
|
from django.db import transaction
|
||||||
|
batch, pending = 500, []
|
||||||
|
for e in qs.iterator():
|
||||||
|
original = (e.source or "").strip()
|
||||||
|
normalized, warns = normalize_source_field(original)
|
||||||
|
warnings_total += len(warns)
|
||||||
|
if normalized != original:
|
||||||
|
changed += 1
|
||||||
|
preview.append((e.id, original, normalized))
|
||||||
|
e.source = normalized
|
||||||
|
pending.append(e)
|
||||||
|
if len(pending) >= batch:
|
||||||
|
with transaction.atomic():
|
||||||
|
for obj in pending:
|
||||||
|
obj.save(update_fields=["source"])
|
||||||
|
pending.clear()
|
||||||
|
if pending:
|
||||||
|
with transaction.atomic():
|
||||||
|
for obj in pending:
|
||||||
|
obj.save(update_fields=["source"])
|
||||||
|
else:
|
||||||
|
# dry-run
|
||||||
|
for e in qs.iterator():
|
||||||
|
original = (e.source or "").strip()
|
||||||
|
normalized, warns = normalize_source_field(original)
|
||||||
|
warnings_total += len(warns)
|
||||||
|
if normalized != original:
|
||||||
|
changed += 1
|
||||||
|
preview.append((e.id, original, normalized))
|
||||||
|
|
||||||
|
preview = preview[:100]
|
||||||
|
|
||||||
|
messages.info(
|
||||||
|
request,
|
||||||
|
f"{'Applied' if apply else 'Dry‑run'}: {changed} entries "
|
||||||
|
f"{'changed' if apply else 'would change'}; {warnings_total} warnings."
|
||||||
|
)
|
||||||
|
return render(
|
||||||
|
request,
|
||||||
|
"normalize_source_result.html",
|
||||||
|
{
|
||||||
|
"applied": apply,
|
||||||
|
"changed": changed,
|
||||||
|
"warnings_total": warnings_total,
|
||||||
|
"preview": preview,
|
||||||
|
"limit": limit,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
@login_required
|
||||||
|
def stats_page(request):
|
||||||
|
from collections import Counter
|
||||||
|
total = Entry.objects.count()
|
||||||
|
today = date.today()
|
||||||
|
last30 = Entry.objects.filter(date_added__gte=today - timedelta(days=30)).count()
|
||||||
|
last365 = Entry.objects.filter(date_added__gte=today - timedelta(days=365)).count()
|
||||||
|
|
||||||
|
# ---- Adds per month (existing logic) ----
|
||||||
|
months = []
|
||||||
|
y = today.year
|
||||||
|
m = today.month
|
||||||
|
for i in range(12):
|
||||||
|
mm = m - i
|
||||||
|
yy = y
|
||||||
|
while mm <= 0:
|
||||||
|
mm += 12
|
||||||
|
yy -= 1
|
||||||
|
from datetime import date as _d
|
||||||
|
start = _d(yy, mm, 1)
|
||||||
|
end = _d(yy + 1, 1, 1) if mm == 12 else _d(yy, mm + 1, 1)
|
||||||
|
label = f"{yy}-{mm:02d}"
|
||||||
|
months.append((label, start, end))
|
||||||
|
months = list(reversed(months))
|
||||||
|
|
||||||
|
series = [
|
||||||
|
(label, Entry.objects.filter(date_added__gte=start, date_added__lt=end).count())
|
||||||
|
for label, start, end in months
|
||||||
|
]
|
||||||
|
peak = max((v for _, v in series), default=1)
|
||||||
|
heights = [(label, value, 8 + int((value / peak) * 100) if peak else 8)
|
||||||
|
for label, value in series]
|
||||||
|
|
||||||
|
# ---- Top subjects (existing logic) ----
|
||||||
|
counts = Counter()
|
||||||
|
for subj in Entry.objects.exclude(subject="").values_list("subject", flat=True):
|
||||||
|
for tag in [t.strip() for t in subj.split(",") if t.strip()]:
|
||||||
|
counts[tag.lower()] += 1
|
||||||
|
top_subjects = [{"name": n.title(), "count": c} for n, c in counts.most_common(10)]
|
||||||
|
|
||||||
|
# ---- Scripture analytics (NEW) ----
|
||||||
|
# Expect canonical like: "Matt. 5:14; Ps. 1:1,2; 1 Cor. 13:4-7"
|
||||||
|
# Split on semicolons; capture book and chap/verses if present.
|
||||||
BOOK_RE = re.compile(
|
BOOK_RE = re.compile(
|
||||||
r"^\s*(?P<book>(?:[1-3]\s+)?[A-Za-z\.]+(?:\s+[A-Za-z\.]+){0,2})"
|
r"^\s*(?P<book>(?:[1-3]\s+)?[A-Za-z\.]+(?:\s+[A-Za-z\.]+){0,2})"
|
||||||
r"(?:\s+(?P<cv>\d+(?::[\d,\-\u2013\u2014]+)?))?\s*$"
|
r"(?:\s+(?P<cv>\d+(?::[\d,\-\u2013\u2014]+)?))?\s*$"
|
||||||
@@ -442,7 +618,7 @@ def stats_page(request):
|
|||||||
(sum(ref_per_entry_counts) / len(ref_per_entry_counts))
|
(sum(ref_per_entry_counts) / len(ref_per_entry_counts))
|
||||||
if ref_per_entry_counts else 0.0, 2
|
if ref_per_entry_counts else 0.0, 2
|
||||||
)
|
)
|
||||||
book_distribution = books_counter.most_common(30)
|
book_distribution = books_counter.most_common(30) # handy for future charts
|
||||||
|
|
||||||
return render(
|
return render(
|
||||||
request,
|
request,
|
||||||
@@ -454,321 +630,12 @@ def stats_page(request):
|
|||||||
"series": series,
|
"series": series,
|
||||||
"heights": heights,
|
"heights": heights,
|
||||||
"top_subjects": top_subjects,
|
"top_subjects": top_subjects,
|
||||||
|
|
||||||
|
# NEW context for the template
|
||||||
"entries_with_scripture": entries_with_scripture,
|
"entries_with_scripture": entries_with_scripture,
|
||||||
"avg_refs_per_entry": avg_refs_per_entry,
|
"avg_refs_per_entry": avg_refs_per_entry,
|
||||||
"top_books": top_books,
|
"top_books": top_books,
|
||||||
"top_refs": top_refs,
|
"top_refs": top_refs,
|
||||||
"book_distribution": book_distribution,
|
"book_distribution": book_distribution,
|
||||||
},
|
},
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# --------------------------
|
|
||||||
# Tools: Scripture Normalizer
|
|
||||||
# --------------------------
|
|
||||||
|
|
||||||
@login_required
|
|
||||||
@user_passes_test(is_admin)
|
|
||||||
@require_http_methods(["GET", "POST"])
|
|
||||||
def normalize_scripture(request):
|
|
||||||
"""
|
|
||||||
GET -> dry-run preview (summary + first 100 examples)
|
|
||||||
POST -> apply changes to all entries' scripture_raw (batched)
|
|
||||||
Optional ?limit= for preview subset.
|
|
||||||
"""
|
|
||||||
apply = request.method == "POST"
|
|
||||||
limit = int(request.GET.get("limit", "0") or "0")
|
|
||||||
|
|
||||||
qs = Entry.objects.all().order_by("id")
|
|
||||||
if limit:
|
|
||||||
qs = qs[:limit]
|
|
||||||
|
|
||||||
changed = 0
|
|
||||||
warnings_total = 0
|
|
||||||
preview = []
|
|
||||||
|
|
||||||
if apply:
|
|
||||||
# write in batches to keep transactions short
|
|
||||||
from django.db import transaction
|
|
||||||
batch, pending = 500, []
|
|
||||||
for e in qs.iterator():
|
|
||||||
original = (e.scripture_raw or "").strip()
|
|
||||||
normalized, warns = normalize_scripture_field(original)
|
|
||||||
warnings_total += len(warns)
|
|
||||||
if normalized != original:
|
|
||||||
changed += 1
|
|
||||||
preview.append((e.id, original, normalized))
|
|
||||||
e.scripture_raw = normalized
|
|
||||||
pending.append(e)
|
|
||||||
if len(pending) >= batch:
|
|
||||||
with transaction.atomic():
|
|
||||||
for obj in pending:
|
|
||||||
obj.save(update_fields=["scripture_raw"])
|
|
||||||
pending.clear()
|
|
||||||
if pending:
|
|
||||||
from django.db import transaction
|
|
||||||
with transaction.atomic():
|
|
||||||
for obj in pending:
|
|
||||||
obj.save(update_fields=["scripture_raw"])
|
|
||||||
else:
|
|
||||||
# dry-run only
|
|
||||||
for e in qs.iterator():
|
|
||||||
original = (e.scripture_raw or "").strip()
|
|
||||||
normalized, warns = normalize_scripture_field(original)
|
|
||||||
warnings_total += len(warns)
|
|
||||||
if normalized != original:
|
|
||||||
changed += 1
|
|
||||||
preview.append((e.id, original, normalized))
|
|
||||||
|
|
||||||
preview = preview[:100] # keep the table reasonable
|
|
||||||
|
|
||||||
messages.info(
|
|
||||||
request,
|
|
||||||
f"{'Applied' if apply else 'Dry-run'}: {changed} entries "
|
|
||||||
f"{'changed' if apply else 'would change'}; {warnings_total} warnings."
|
|
||||||
)
|
|
||||||
return render(
|
|
||||||
request,
|
|
||||||
"normalize_result.html",
|
|
||||||
{
|
|
||||||
"title": "Scripture Normalizer",
|
|
||||||
"applied": apply,
|
|
||||||
"changed": changed,
|
|
||||||
"warnings_total": warnings_total,
|
|
||||||
"preview": preview,
|
|
||||||
"limit": limit,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# --------------------------
|
|
||||||
# Tools: Source Normalizer (WOL short-code)
|
|
||||||
# --------------------------
|
|
||||||
|
|
||||||
def _safe_wol_normalize(source_line: str) -> str:
|
|
||||||
"""
|
|
||||||
Try to normalize a source line to WOL-style notation using an external helper
|
|
||||||
if available; otherwise return the original line unchanged.
|
|
||||||
We attempt several likely function names to maximize compatibility.
|
|
||||||
"""
|
|
||||||
if not (source_line or "").strip():
|
|
||||||
return source_line or ""
|
|
||||||
try:
|
|
||||||
# Attempt to import a helper module you added to your project.
|
|
||||||
from . import wol_citation_converter as _wol
|
|
||||||
except Exception:
|
|
||||||
return source_line
|
|
||||||
|
|
||||||
for fn_name in (
|
|
||||||
"normalize_wol_citation",
|
|
||||||
"convert_wol_citation",
|
|
||||||
"convert_source_line",
|
|
||||||
"normalize_source_line",
|
|
||||||
):
|
|
||||||
try:
|
|
||||||
fn = getattr(_wol, fn_name, None)
|
|
||||||
if callable(fn):
|
|
||||||
out = fn(source_line)
|
|
||||||
return out if (out is not None) else source_line
|
|
||||||
except Exception:
|
|
||||||
continue
|
|
||||||
return source_line
|
|
||||||
|
|
||||||
|
|
||||||
@login_required
|
|
||||||
@user_passes_test(is_admin)
|
|
||||||
@require_http_methods(["GET", "POST"])
|
|
||||||
def normalize_source(request):
|
|
||||||
"""
|
|
||||||
GET -> dry-run preview (summary + first 100 examples)
|
|
||||||
POST -> apply changes to all entries' source (batched)
|
|
||||||
Optional ?limit= for preview subset.
|
|
||||||
"""
|
|
||||||
apply = request.method == "POST"
|
|
||||||
limit = int(request.GET.get("limit", "0") or "0")
|
|
||||||
|
|
||||||
qs = Entry.objects.all().order_by("id")
|
|
||||||
if limit:
|
|
||||||
qs = qs[:limit]
|
|
||||||
|
|
||||||
changed = 0
|
|
||||||
preview: List[Tuple[int, str, str]] = []
|
|
||||||
|
|
||||||
if apply:
|
|
||||||
from django.db import transaction
|
|
||||||
batch, pending = 500, []
|
|
||||||
for e in qs.iterator():
|
|
||||||
original = (e.source or "").strip()
|
|
||||||
normalized = _safe_wol_normalize(original)
|
|
||||||
if normalized != original:
|
|
||||||
changed += 1
|
|
||||||
preview.append((e.id, original, normalized))
|
|
||||||
e.source = normalized
|
|
||||||
pending.append(e)
|
|
||||||
if len(pending) >= batch:
|
|
||||||
with transaction.atomic():
|
|
||||||
for obj in pending:
|
|
||||||
obj.save(update_fields=["source"])
|
|
||||||
pending.clear()
|
|
||||||
if pending:
|
|
||||||
from django.db import transaction
|
|
||||||
with transaction.atomic():
|
|
||||||
for obj in pending:
|
|
||||||
obj.save(update_fields=["source"])
|
|
||||||
else:
|
|
||||||
# dry-run only
|
|
||||||
for e in qs.iterator():
|
|
||||||
original = (e.source or "").strip()
|
|
||||||
normalized = _safe_wol_normalize(original)
|
|
||||||
if normalized != original:
|
|
||||||
changed += 1
|
|
||||||
preview.append((e.id, original, normalized))
|
|
||||||
|
|
||||||
preview = preview[:100]
|
|
||||||
|
|
||||||
messages.info(
|
|
||||||
request,
|
|
||||||
f"{'Applied' if apply else 'Dry-run'}: {changed} entries "
|
|
||||||
f"{'changed' if apply else 'would change'}."
|
|
||||||
)
|
|
||||||
return render(
|
|
||||||
request,
|
|
||||||
"normalize_source_result.html",
|
|
||||||
{
|
|
||||||
"applied": apply,
|
|
||||||
"changed": changed,
|
|
||||||
"preview": preview,
|
|
||||||
"limit": limit,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# --------------------------
|
|
||||||
# Tools: Subjects Normalizer (NEW)
|
|
||||||
# --------------------------
|
|
||||||
|
|
||||||
SUBJECT_SPLIT_PATTERN = re.compile(
|
|
||||||
r"""
|
|
||||||
\s* # optional space
|
|
||||||
(?: # any of the following delimiters:
|
|
||||||
; # semicolon
|
|
||||||
| \| # vertical bar
|
|
||||||
| / # slash
|
|
||||||
| \\ # backslash
|
|
||||||
| · | • # bullets
|
|
||||||
| [–—] # en/em dash (only when surrounded by spaces)
|
|
||||||
| , # comma (we'll re-normalize commas too)
|
|
||||||
)
|
|
||||||
\s*
|
|
||||||
""",
|
|
||||||
re.X,
|
|
||||||
)
|
|
||||||
|
|
||||||
# For hyphen: only split when it's clearly used as a separator " - " (space-hyphen-space)
|
|
||||||
HARD_DASH_SEP = re.compile(r"\s-\s")
|
|
||||||
|
|
||||||
|
|
||||||
def _normalize_subjects_field(subject: str) -> str:
|
|
||||||
"""
|
|
||||||
Normalize the subject field to a clean, comma-separated list.
|
|
||||||
|
|
||||||
Rules:
|
|
||||||
- Split on common delimiters: ';', '/', '\', '|', bullets, en/em dash.
|
|
||||||
- Convert " - " (space-hyphen-space) to a delimiter as well.
|
|
||||||
- Keep hyphens within words (no split on 'word-word').
|
|
||||||
- Trim whitespace; drop empties.
|
|
||||||
- De-duplicate while preserving order.
|
|
||||||
"""
|
|
||||||
if not (subject or "").strip():
|
|
||||||
return ""
|
|
||||||
|
|
||||||
# First, convert " - " into a comma so we don't split on inner-word hyphens.
|
|
||||||
s = HARD_DASH_SEP.sub(", ", subject)
|
|
||||||
|
|
||||||
# Now split on the big set (includes commas; we'll rebuild clean commas later)
|
|
||||||
parts = SUBJECT_SPLIT_PATTERN.split(s)
|
|
||||||
|
|
||||||
cleaned: List[str] = []
|
|
||||||
seen = set()
|
|
||||||
for p in parts:
|
|
||||||
p = " ".join((p or "").split()) # collapse inner whitespace
|
|
||||||
if not p:
|
|
||||||
continue
|
|
||||||
# Many entries already have comma-separated subjects; split those too
|
|
||||||
subparts = [q.strip() for q in p.split(",") if q.strip()]
|
|
||||||
for q in subparts:
|
|
||||||
# Preserve order; avoid duplicates
|
|
||||||
key = q.lower()
|
|
||||||
if key not in seen:
|
|
||||||
seen.add(key)
|
|
||||||
cleaned.append(q)
|
|
||||||
|
|
||||||
return ", ".join(cleaned)
|
|
||||||
|
|
||||||
|
|
||||||
@login_required
|
|
||||||
@user_passes_test(is_admin)
|
|
||||||
@require_http_methods(["GET", "POST"])
|
|
||||||
def normalize_subjects(request):
|
|
||||||
"""
|
|
||||||
GET -> dry-run preview (summary + first 100 examples)
|
|
||||||
POST -> apply changes to all entries' subject (batched)
|
|
||||||
Optional ?limit= for preview subset.
|
|
||||||
"""
|
|
||||||
apply = request.method == "POST"
|
|
||||||
limit = int(request.GET.get("limit", "0") or "0")
|
|
||||||
|
|
||||||
qs = Entry.objects.all().order_by("id")
|
|
||||||
if limit:
|
|
||||||
qs = qs[:limit]
|
|
||||||
|
|
||||||
changed = 0
|
|
||||||
preview: List[Tuple[int, str, str]] = []
|
|
||||||
|
|
||||||
if apply:
|
|
||||||
from django.db import transaction
|
|
||||||
batch, pending = 500, []
|
|
||||||
for e in qs.iterator():
|
|
||||||
original = (e.subject or "").strip()
|
|
||||||
normalized = _normalize_subjects_field(original)
|
|
||||||
if normalized != original:
|
|
||||||
changed += 1
|
|
||||||
preview.append((e.id, original, normalized))
|
|
||||||
e.subject = normalized
|
|
||||||
pending.append(e)
|
|
||||||
if len(pending) >= batch:
|
|
||||||
with transaction.atomic():
|
|
||||||
for obj in pending:
|
|
||||||
obj.save(update_fields=["subject"])
|
|
||||||
pending.clear()
|
|
||||||
if pending:
|
|
||||||
from django.db import transaction
|
|
||||||
with transaction.atomic():
|
|
||||||
for obj in pending:
|
|
||||||
obj.save(update_fields=["subject"])
|
|
||||||
else:
|
|
||||||
# dry-run only
|
|
||||||
for e in qs.iterator():
|
|
||||||
original = (e.subject or "").strip()
|
|
||||||
normalized = _normalize_subjects_field(original)
|
|
||||||
if normalized != original:
|
|
||||||
changed += 1
|
|
||||||
preview.append((e.id, original, normalized))
|
|
||||||
|
|
||||||
preview = preview[:100]
|
|
||||||
|
|
||||||
messages.info(
|
|
||||||
request,
|
|
||||||
f"{'Applied' if apply else 'Dry-run'}: {changed} entries "
|
|
||||||
f"{'changed' if apply else 'would change'}."
|
|
||||||
)
|
|
||||||
return render(
|
|
||||||
request,
|
|
||||||
"normalize_subjects_result.html",
|
|
||||||
{
|
|
||||||
"applied": apply,
|
|
||||||
"changed": changed,
|
|
||||||
"preview": preview,
|
|
||||||
"limit": limit,
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
Reference in New Issue
Block a user