From f64d41313e346719dacaff5ef2495f14c05b6884 Mon Sep 17 00:00:00 2001 From: Joshua Laymon Date: Fri, 22 Aug 2025 03:23:50 +0000 Subject: [PATCH] Update web/core/views.py --- web/core/views.py | 81 ++++++++++++++++++++++------------------------- 1 file changed, 38 insertions(+), 43 deletions(-) diff --git a/web/core/views.py b/web/core/views.py index 27ebac5..ffd8081 100644 --- a/web/core/views.py +++ b/web/core/views.py @@ -329,95 +329,90 @@ def entry_delete(request, entry_id): @login_required @user_passes_test(is_admin) def import_wizard(request): - # Safety: expected header list (matches DB/order the importer expects) - _EXPECTED_HEADERS = [ + EXPECTED = [ "Subject", "Illustration", "Application", "Scripture", "Source", "Talk Title", "Talk Number", "Code", "Date", "Date Edited", ] + EXPECTED_NORM = [h.lower() for h in EXPECTED] if request.method == "POST": form = ImportForm(request.POST, request.FILES) if form.is_valid(): try: + import io, re, csv as _csv + raw = form.cleaned_data["file"].read() + text = raw.decode("utf-8-sig", errors="replace") # BOM-safe - import io - import csv as _csv - - # Decode once (BOM‑safe) - text = raw.decode("utf-8-sig", errors="replace") - - # Try to sniff a dialect; fall back to Excel-style CSV + # Try to sniff; fall back to excel dialect try: first_line = text.splitlines()[0] if text else "" dialect = _csv.Sniffer().sniff(first_line) if first_line else _csv.excel except Exception: dialect = _csv.excel - rdr = _csv.reader(io.StringIO(text), dialect) - rows = list(rdr) + rows = list(_csv.reader(io.StringIO(text), dialect)) if not rows: raise ValueError("The CSV file appears to be empty.") - expected = _EXPECTED_HEADERS - expected_norm = [h.lower() for h in expected] + # --- header cleaning --- + # Handles: r."Talk Title", r:'Talk Title', r=Talk Title, r: Talk Title, etc. + _r_prefix = re.compile(r'^[rR]\s*[\.\:\=\-]\s*') - # Header cleaner: fixes r:"Talk Title", stray quotes, spaces, case - def _clean_header(s): + def clean_header_cell(s: str) -> str: s = "" if s is None else str(s) s = s.strip() - if s.lower().startswith("r:") or s.lower().startswith("r="): - s = s[2:].lstrip() - if (len(s) >= 2) and (s[0] == s[-1]) and s[0] in ('"', "'"): - s = s[1:-1] + # strip balanced quotes + if len(s) >= 2 and s[0] == s[-1] and s[0] in ('"', "'"): + s = s[1:-1].strip() + # strip weird r. prefix + s = _r_prefix.sub("", s) + # final trim + lower for comparison return s.strip().lower() first = rows[0] - norm_first = [_clean_header(c) for c in first] + norm_first = [clean_header_cell(c) for c in first] - # If first row isn’t our header but length matches, inject one - header_ok = (norm_first == expected_norm) - if not header_ok and len(first) == len(expected): - rows.insert(0, expected) - elif not header_ok and len(first) != len(expected): - # Try common alternate delimiters if column count is off + header_ok = (norm_first == EXPECTED_NORM) + + # If first row isn’t the header but length matches, inject our clean header + if not header_ok and len(first) == len(EXPECTED): + rows.insert(0, EXPECTED) + elif not header_ok and len(first) != len(EXPECTED): + # Try common alternate delimiters if the column count is off for delim in (";", "\t"): - rdr2 = _csv.reader(io.StringIO(text), delimiter=delim) - test_rows = list(rdr2) - if test_rows and len(test_rows[0]) == len(expected): - rows = test_rows + test = list(_csv.reader(io.StringIO(text), delimiter=delim)) + if test and len(test[0]) == len(EXPECTED): + rows = test first = rows[0] - norm_first = [_clean_header(c) for c in first] - header_ok = (norm_first == expected_norm) + norm_first = [clean_header_cell(c) for c in first] + header_ok = (norm_first == EXPECTED_NORM) if not header_ok: - rows.insert(0, expected) + rows.insert(0, EXPECTED) break - # Re-encode a sanitized CSV for the existing importer + # Re‑encode a sanitized CSV to feed the existing importer out = io.StringIO() w = _csv.writer(out) for r in rows: w.writerow(r) fixed_raw = out.getvalue().encode("utf-8") - # Keep utils in sync for importer variants that read EXPECTED_HEADERS + # Keep utils in sync for any helpers that read EXPECTED_HEADERS from . import utils as core_utils - core_utils.EXPECTED_HEADERS = expected + core_utils.EXPECTED_HEADERS = EXPECTED - # Hand off to the robust importer you already have report = import_csv_bytes(fixed_raw, dry_run=form.cleaned_data["dry_run"]) or {} report["header_ok"] = header_ok if not header_ok: messages.warning( request, - "The first row didn’t match the expected header; a clean header was injected automatically." + "The first row didn’t match the expected header; it was cleaned/normalized automatically." ) - return render( - request, - "import_result.html", - {"report": report, "dry_run": form.cleaned_data["dry_run"]}, - ) + return render(request, "import_result.html", + {"report": report, "dry_run": form.cleaned_data["dry_run"]}) + except Exception as e: messages.error(request, f"Import failed: {e}") else: