Update web/core/views.py

2025-08-22 03:10:50 +00:00
parent ecb8e6657c
commit 666f6dd82a
1 changed files with 78 additions and 52 deletions
@@ -341,51 +341,79 @@ def import_wizard(request):
        try:
            raw = form.cleaned_data["file"].read()
                # --- super-defensive header peek (never raises) ---
            import io, csv as _csv
                sio = io.StringIO(raw.decode("utf-8-sig", errors="replace"))
-                header_ok = False
+            # Decode once (BOM-safe)
            text = raw.decode("utf-8-sig", errors="replace")
            # Try to sniff a dialect; fall back to Excel-style CSV
            try:
-                    rdr = _csv.reader(sio)
+                first_line = text.splitlines()[0] if text else ""
-                    first_row = next(rdr, [])
+                dialect = _csv.Sniffer().sniff(first_line) if first_line else _csv.excel
            except Exception:
                dialect = _csv.excel
-                    def _clean(s):
+            rdr = _csv.reader(io.StringIO(text), dialect)
            rows = list(rdr)
            if not rows:
                raise ValueError("The CSV file appears to be empty.")
            # Expected header (DB field order)
            expected = [
                "Subject", "Illustration", "Application", "Scripture", "Source",
                "Talk Title", "Talk Number", "Code", "Date", "Date Edited",
            ]
            expected_norm = [h.lower() for h in expected]
            # Header cleaner: fixes r:"Talk Title", stray quotes, spaces, case
            def _clean_header(s):
                s = "" if s is None else str(s)
-                        # strip quotes and odd prefixes like r:"Talk Title"
+                s = s.strip()
-                        s = s.strip().strip("'").strip('"')
+                if s.lower().startswith("r:") or s.lower().startswith("r="):
                        if s.lower().startswith("r:"):
                    s = s[2:].lstrip()
                if (len(s) >= 2) and (s[0] == s[-1]) and s[0] in ('"', "'"):
                    s = s[1:-1]
                return s.strip().lower()
-                    norm = [_clean(c) for c in first_row]
+            first = rows[0]
            norm_first = [_clean_header(c) for c in first]
-                    expected = [h.strip().lower() for h in _EXPECTED_HEADERS]
+            # If first row isn’t our header but length matches, inject one
-                    header_ok = (norm == expected)
+            header_ok = (norm_first == expected_norm)
-                except Exception:
+            if not header_ok and len(first) == len(expected):
-                    header_ok = False
+                rows.insert(0, expected)
-                finally:
+            elif not header_ok and len(first) != len(expected):
-                    # Rewind so the real importer reads the full file
+                # Try common alternate delimiters if column count is off
-                    sio.seek(0)
+                for delim in (";", "\t"):
                    rdr2 = _csv.reader(io.StringIO(text), delimiter=delim)
                    test_rows = list(rdr2)
                    if test_rows and len(test_rows[0]) == len(expected):
                        rows = test_rows
                        first = rows[0]
                        norm_first = [_clean_header(c) for c in first]
                        header_ok = (norm_first == expected_norm)
                        if not header_ok:
                            rows.insert(0, expected)
                        break
-                # Make sure utils knows the expected headers
+            # Re-encode a sanitized CSV for the existing importer
            out = io.StringIO()
            w = _csv.writer(out)
            for r in rows:
                w.writerow(r)
            fixed_raw = out.getvalue().encode("utf-8")
            # Keep utils in sync for importer variants that read EXPECTED_HEADERS
            from . import utils as core_utils
-                if not hasattr(core_utils, "EXPECTED_HEADERS"):
+            core_utils.EXPECTED_HEADERS = expected
                    core_utils.EXPECTED_HEADERS = _EXPECTED_HEADERS
-                # Hand off to your robust importer
+            # Hand off to the robust importer you already have
-                report = import_csv_bytes(
+            report = import_csv_bytes(fixed_raw, dry_run=form.cleaned_data["dry_run"]) or {}
                    raw,
                    dry_run=form.cleaned_data["dry_run"],
                )
                # Attach header check info
                report = report or {}
            report["header_ok"] = header_ok
            if not header_ok:
                messages.warning(
                    request,
-                        "The first row does not match the expected header; assuming the file has no header."
+                    "The first row didn’t match the expected header; a clean header was injected automatically."
                )
            return render(
@@ -397,8 +425,6 @@ def import_wizard(request):
            messages.error(request, f"Import failed: {e}")
 else:
    form = ImportForm()
    # 👇 stays inside the function
 return render(request, "import_wizard.html", {"form": form})