From 666f6dd82ad6225840f1470dfd660d70594a9795 Mon Sep 17 00:00:00 2001 From: Joshua Laymon Date: Fri, 22 Aug 2025 03:10:50 +0000 Subject: [PATCH] Update web/core/views.py --- web/core/views.py | 130 +++++++++++++++++++++++++++------------------- 1 file changed, 78 insertions(+), 52 deletions(-) diff --git a/web/core/views.py b/web/core/views.py index 130dcfe..95497f6 100644 --- a/web/core/views.py +++ b/web/core/views.py @@ -336,70 +336,96 @@ def import_wizard(request): ] if request.method == "POST": - form = ImportForm(request.POST, request.FILES) - if form.is_valid(): + form = ImportForm(request.POST, request.FILES) + if form.is_valid(): + try: + raw = form.cleaned_data["file"].read() + + import io, csv as _csv + + # Decode once (BOM-safe) + text = raw.decode("utf-8-sig", errors="replace") + + # Try to sniff a dialect; fall back to Excel-style CSV try: - raw = form.cleaned_data["file"].read() + first_line = text.splitlines()[0] if text else "" + dialect = _csv.Sniffer().sniff(first_line) if first_line else _csv.excel + except Exception: + dialect = _csv.excel - # --- super-defensive header peek (never raises) --- - import io, csv as _csv - sio = io.StringIO(raw.decode("utf-8-sig", errors="replace")) + rdr = _csv.reader(io.StringIO(text), dialect) + rows = list(rdr) + if not rows: + raise ValueError("The CSV file appears to be empty.") - header_ok = False - try: - rdr = _csv.reader(sio) - first_row = next(rdr, []) + # Expected header (DB field order) + expected = [ + "Subject", "Illustration", "Application", "Scripture", "Source", + "Talk Title", "Talk Number", "Code", "Date", "Date Edited", + ] + expected_norm = [h.lower() for h in expected] - def _clean(s): - s = "" if s is None else str(s) - # strip quotes and odd prefixes like r:"Talk Title" - s = s.strip().strip("'").strip('"') - if s.lower().startswith("r:"): - s = s[2:].lstrip() - return s.strip().lower() + # Header cleaner: fixes r:"Talk Title", stray quotes, spaces, case + def _clean_header(s): + s = "" if s is None else str(s) + s = s.strip() + if s.lower().startswith("r:") or s.lower().startswith("r="): + s = s[2:].lstrip() + if (len(s) >= 2) and (s[0] == s[-1]) and s[0] in ('"', "'"): + s = s[1:-1] + return s.strip().lower() - norm = [_clean(c) for c in first_row] + first = rows[0] + norm_first = [_clean_header(c) for c in first] - expected = [h.strip().lower() for h in _EXPECTED_HEADERS] - header_ok = (norm == expected) - except Exception: - header_ok = False - finally: - # Rewind so the real importer reads the full file - sio.seek(0) + # If first row isn’t our header but length matches, inject one + header_ok = (norm_first == expected_norm) + if not header_ok and len(first) == len(expected): + rows.insert(0, expected) + elif not header_ok and len(first) != len(expected): + # Try common alternate delimiters if column count is off + for delim in (";", "\t"): + rdr2 = _csv.reader(io.StringIO(text), delimiter=delim) + test_rows = list(rdr2) + if test_rows and len(test_rows[0]) == len(expected): + rows = test_rows + first = rows[0] + norm_first = [_clean_header(c) for c in first] + header_ok = (norm_first == expected_norm) + if not header_ok: + rows.insert(0, expected) + break - # Make sure utils knows the expected headers - from . import utils as core_utils - if not hasattr(core_utils, "EXPECTED_HEADERS"): - core_utils.EXPECTED_HEADERS = _EXPECTED_HEADERS + # Re-encode a sanitized CSV for the existing importer + out = io.StringIO() + w = _csv.writer(out) + for r in rows: + w.writerow(r) + fixed_raw = out.getvalue().encode("utf-8") - # Hand off to your robust importer - report = import_csv_bytes( - raw, - dry_run=form.cleaned_data["dry_run"], - ) + # Keep utils in sync for importer variants that read EXPECTED_HEADERS + from . import utils as core_utils + core_utils.EXPECTED_HEADERS = expected - # Attach header check info - report = report or {} - report["header_ok"] = header_ok - if not header_ok: - messages.warning( - request, - "The first row does not match the expected header; assuming the file has no header." - ) - - return render( + # Hand off to the robust importer you already have + report = import_csv_bytes(fixed_raw, dry_run=form.cleaned_data["dry_run"]) or {} + report["header_ok"] = header_ok + if not header_ok: + messages.warning( request, - "import_result.html", - {"report": report, "dry_run": form.cleaned_data["dry_run"]}, + "The first row didn’t match the expected header; a clean header was injected automatically." ) - except Exception as e: - messages.error(request, f"Import failed: {e}") - else: - form = ImportForm() - # 👇 stays inside the function - return render(request, "import_wizard.html", {"form": form}) + return render( + request, + "import_result.html", + {"report": report, "dry_run": form.cleaned_data["dry_run"]}, + ) + except Exception as e: + messages.error(request, f"Import failed: {e}") +else: + form = ImportForm() +return render(request, "import_wizard.html", {"form": form}) @login_required