Update web/core/utils.py
This commit is contained in:
parent
cc6461ced2
commit
17c9a89848
@ -336,158 +336,181 @@ def _to_int_or_none(v: Any) -> Optional[int]:
|
||||
return None
|
||||
|
||||
|
||||
def import_csv_bytes(data: bytes, dry_run: bool = True) -> Dict[str, Any]:
|
||||
"""
|
||||
Robust CSV importer for Entry.
|
||||
import csv
|
||||
import io
|
||||
from datetime import datetime
|
||||
from typing import Optional, List, Dict, Any
|
||||
|
||||
- Accepts your human-readable header (Subject, Illustration, ...)
|
||||
and/or direct model field names.
|
||||
- Normalizes odd headers like r."Talk Title".
|
||||
- Handles BOM & dialect sniffing.
|
||||
- Returns a report dict: {ok, created, updated, skipped, errors, preview, total_rows, header}
|
||||
from django.db import transaction
|
||||
|
||||
from .models import Entry
|
||||
|
||||
# Canonical header order expected from the CSV (and shown in the UI)
|
||||
EXPECTED_HEADERS = [
|
||||
"Subject", "Illustration", "Application", "Scripture", "Source",
|
||||
"Talk Title", "Talk Number", "Code", "Date", "Date Edited",
|
||||
]
|
||||
|
||||
def _clean_header_cell(s: str) -> str:
|
||||
if s is None:
|
||||
return ""
|
||||
s = str(s).strip()
|
||||
# Handle odd prefixes like r:"Talk Title"
|
||||
low = s.lower()
|
||||
if low.startswith("r:") or low.startswith("r="):
|
||||
s = s[2:].lstrip()
|
||||
# Strip wrapping quotes
|
||||
if len(s) >= 2 and s[0] == s[-1] and s[0] in ('"', "'"):
|
||||
s = s[1:-1]
|
||||
return s.strip()
|
||||
|
||||
def _parse_int(x: str) -> Optional[int]:
|
||||
x = (x or "").strip()
|
||||
if not x:
|
||||
return None
|
||||
try:
|
||||
return int(x)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _parse_date(x: str):
|
||||
"""
|
||||
report: Dict[str, Any] = {
|
||||
"ok": False,
|
||||
Returns a date object or None.
|
||||
Tries several common formats, then ISO.
|
||||
"""
|
||||
x = (x or "").strip()
|
||||
if not x:
|
||||
return None
|
||||
for fmt in ("%Y-%m-%d", "%m/%d/%Y", "%d/%m/%Y"):
|
||||
try:
|
||||
return datetime.strptime(x, fmt).date()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
return datetime.fromisoformat(x).date()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def import_csv_bytes(content: bytes, dry_run: bool = True, batch_size: int = 1000) -> Dict[str, Any]:
|
||||
"""
|
||||
Parse the uploaded CSV (bytes), optionally write to DB.
|
||||
Returns a report dict the templates expect:
|
||||
|
||||
{
|
||||
"total": <int>,
|
||||
"created": <int>,
|
||||
"updated": 0,
|
||||
"skipped": <int>,
|
||||
"errors": [ ... ],
|
||||
"preview": [ [cell,...], ... up to 10 rows ],
|
||||
"columns": EXPECTED_HEADERS,
|
||||
}
|
||||
|
||||
Notes:
|
||||
- This implementation always CREATES new rows (no dedupe).
|
||||
If you want upserts later, we can key on entry_code or (talk_number, entry_code).
|
||||
"""
|
||||
report = {
|
||||
"total": 0,
|
||||
"created": 0,
|
||||
"updated": 0,
|
||||
"skipped": 0,
|
||||
"errors": [], # list[str]
|
||||
"preview": [], # first ~10 rows that would be imported
|
||||
"total_rows": 0,
|
||||
"header": [],
|
||||
"errors": [],
|
||||
"preview": [],
|
||||
"columns": EXPECTED_HEADERS[:],
|
||||
}
|
||||
|
||||
# --- decode safely (remove BOM, keep unknowns) ---
|
||||
text = data.decode("utf-8-sig", errors="replace")
|
||||
|
||||
# --- sniff dialect; fall back to excel ---
|
||||
# Decode once (BOM-safe), sniff dialect, fall back to excel
|
||||
text = content.decode("utf-8-sig", errors="replace")
|
||||
try:
|
||||
sample = "\n".join(text.splitlines()[:10])
|
||||
dialect = csv.Sniffer().sniff(sample) if sample.strip() else csv.excel
|
||||
first_line = text.splitlines()[0] if text else ""
|
||||
dialect = csv.Sniffer().sniff(first_line) if first_line else csv.excel
|
||||
except Exception:
|
||||
dialect = csv.excel
|
||||
|
||||
rdr = csv.reader(io.StringIO(text), dialect)
|
||||
rows = list(csv.reader(io.StringIO(text), dialect))
|
||||
if not rows:
|
||||
return report # empty file
|
||||
|
||||
try:
|
||||
raw_header = next(rdr, [])
|
||||
except Exception as e:
|
||||
report["errors"].append(f"Failed reading header: {e}")
|
||||
return report
|
||||
# Header handling (tolerant)
|
||||
first = rows[0]
|
||||
norm_first = [_clean_header_cell(c).lower() for c in first]
|
||||
expected_norm = [h.lower() for h in EXPECTED_HEADERS]
|
||||
header_ok = (norm_first == expected_norm)
|
||||
|
||||
# Clean & map header
|
||||
cleaned = [_clean_header_token(h) for h in raw_header]
|
||||
mapped: List[str] = []
|
||||
unknowns: List[str] = []
|
||||
for token in cleaned:
|
||||
target = ACCEPTABLE_HEADERS.get(token)
|
||||
if target:
|
||||
mapped.append(target)
|
||||
if header_ok:
|
||||
data_rows = rows[1:]
|
||||
else:
|
||||
unknowns.append(token or "(empty)")
|
||||
|
||||
# If header doesn't match expected width but row count does, assume *no* header;
|
||||
# inject expected header so downstream works.
|
||||
has_header = True
|
||||
if unknowns:
|
||||
# Heuristic: if the number of columns equals EXPECTED_HEADERS and *none*
|
||||
# of the cleaned tokens map, it's probably a data row (no header)
|
||||
matches = sum(1 for t in cleaned if t in ACCEPTABLE_HEADERS)
|
||||
if matches == 0 and len(cleaned) == len(EXPECTED_HEADERS):
|
||||
# inject expected header and re-run
|
||||
has_header = False
|
||||
mapped = [HEADER_MAP[h] for h in EXPECTED_HEADERS]
|
||||
# rebuild a reader with the expected header injected
|
||||
sio = io.StringIO(text)
|
||||
rdr_tmp = csv.reader(sio, dialect)
|
||||
rows = list(rdr_tmp)
|
||||
rows.insert(0, EXPECTED_HEADERS) # inject pretty header for report
|
||||
rdr = iter(rows) # consume from this list iterator
|
||||
next(rdr, None) # skip our injected header
|
||||
# If first row isn't a match but the column count matches, treat it as data
|
||||
if len(first) == len(EXPECTED_HEADERS):
|
||||
data_rows = rows # treat all rows as data; we'll use EXPECTED order
|
||||
else:
|
||||
# keep going but warn in the report
|
||||
# Try common alternate delimiters to recover
|
||||
for delim in (";", "\t"):
|
||||
rows2 = list(csv.reader(io.StringIO(text), delimiter=delim))
|
||||
if rows2 and len(rows2[0]) == len(EXPECTED_HEADERS):
|
||||
rows = rows2
|
||||
first = rows[0]
|
||||
norm_first = [_clean_header_cell(c).lower() for c in first]
|
||||
header_ok = (norm_first == expected_norm)
|
||||
data_rows = rows[1:] if header_ok else rows
|
||||
break
|
||||
else:
|
||||
# Could not reconcile columns
|
||||
report["errors"].append(
|
||||
"Some header columns were not recognized: "
|
||||
+ ", ".join(unknowns)
|
||||
+ " (continuing with best-effort mapping)"
|
||||
f"Column mismatch: saw {len(first)} but expected {len(EXPECTED_HEADERS)}."
|
||||
)
|
||||
|
||||
report["header"] = mapped
|
||||
|
||||
# Read rows
|
||||
rows = list(rdr)
|
||||
report["total_rows"] = len(rows)
|
||||
|
||||
# Build row dicts
|
||||
def row_to_obj(row_idx: int, row: List[str]) -> Tuple[Optional[Entry], Optional[Dict[str, Any]], Optional[str]]:
|
||||
"""
|
||||
Returns (entry_instance_or_None, values_dict_or_None, error_message_or_None)
|
||||
but does not save to DB.
|
||||
"""
|
||||
if len(row) < len(mapped):
|
||||
return None, None, f"Row {row_idx}: expected {len(mapped)} columns, found {len(row)}."
|
||||
values: Dict[str, Any] = {}
|
||||
for i, field in enumerate(mapped):
|
||||
raw_val = row[i] if i < len(row) else ""
|
||||
# Coerce types for specific fields
|
||||
if field in ("date_added", "date_edited"):
|
||||
values[field] = _parse_date(raw_val)
|
||||
elif field == "talk_number":
|
||||
values[field] = _to_int_or_none(raw_val)
|
||||
else:
|
||||
values[field] = (raw_val or "").strip()
|
||||
|
||||
# Create (unsaved) Entry instance for preview/validation
|
||||
e = Entry(**{k: v for k, v in values.items() if v not in (None, "")})
|
||||
return e, values, None
|
||||
|
||||
# Preview first few
|
||||
for i, row in enumerate(rows[:10], start=1):
|
||||
e, values, err = row_to_obj(i, row)
|
||||
report["preview"].append({
|
||||
"row": i,
|
||||
"values": values if values else {},
|
||||
"error": err,
|
||||
})
|
||||
|
||||
if dry_run:
|
||||
# Dry run: don’t write, just validate basic structure
|
||||
bad = [p for p in report["preview"] if p["error"]]
|
||||
if bad:
|
||||
report["errors"].extend(p["error"] for p in bad if p["error"])
|
||||
report["ok"] = len(report["errors"]) == 0
|
||||
return report
|
||||
|
||||
# Real import (create new rows).
|
||||
# If you want update/merge behavior, add a key strategy here.
|
||||
created = 0
|
||||
updated = 0
|
||||
skipped = 0
|
||||
errors: List[str] = []
|
||||
|
||||
with transaction.atomic():
|
||||
for idx, row in enumerate(rows, start=1):
|
||||
e, values, err = row_to_obj(idx, row)
|
||||
if err:
|
||||
errors.append(err)
|
||||
skipped += 1
|
||||
# Normalize rows length (pad/trim) and build preview (first 10)
|
||||
normalized_rows: List[List[str]] = []
|
||||
for r in data_rows:
|
||||
if not r or all((c or "").strip() == "" for c in r):
|
||||
continue
|
||||
if len(r) < len(EXPECTED_HEADERS):
|
||||
r = r + [""] * (len(EXPECTED_HEADERS) - len(r))
|
||||
elif len(r) > len(EXPECTED_HEADERS):
|
||||
r = r[:len(EXPECTED_HEADERS)]
|
||||
normalized_rows.append(r)
|
||||
|
||||
report["total"] = len(normalized_rows)
|
||||
report["preview"] = normalized_rows[:10] # show first 10 rows exactly as seen
|
||||
if dry_run or report["total"] == 0:
|
||||
return report # preview only
|
||||
|
||||
# Create entries in batches (transactional)
|
||||
to_create: List[Entry] = []
|
||||
for r in normalized_rows:
|
||||
try:
|
||||
# Simple create-only behavior:
|
||||
Entry.objects.create(**values)
|
||||
created += 1
|
||||
except Exception as ex:
|
||||
errors.append(f"Row {idx}: failed to save ({ex})")
|
||||
skipped += 1
|
||||
obj = Entry(
|
||||
subject=(r[0] or "").strip(),
|
||||
illustration=(r[1] or "").strip(),
|
||||
application=(r[2] or "").strip(),
|
||||
scripture_raw=(r[3] or "").strip(),
|
||||
source=(r[4] or "").strip(),
|
||||
talk_title=(r[5] or "").strip(),
|
||||
talk_number=_parse_int(r[6]),
|
||||
entry_code=(r[7] or "").strip(),
|
||||
date_added=_parse_date(r[8]),
|
||||
date_edited=_parse_date(r[9]),
|
||||
)
|
||||
to_create.append(obj)
|
||||
except Exception as e:
|
||||
report["skipped"] += 1
|
||||
report["errors"].append(f"Row skipped due to error: {e}")
|
||||
|
||||
if len(to_create) >= batch_size:
|
||||
with transaction.atomic():
|
||||
Entry.objects.bulk_create(to_create, batch_size=batch_size)
|
||||
report["created"] += len(to_create)
|
||||
to_create.clear()
|
||||
|
||||
if to_create:
|
||||
with transaction.atomic():
|
||||
Entry.objects.bulk_create(to_create, batch_size=batch_size)
|
||||
report["created"] += len(to_create)
|
||||
to_create.clear()
|
||||
|
||||
report.update({
|
||||
"ok": len(errors) == 0,
|
||||
"created": created,
|
||||
"updated": updated,
|
||||
"skipped": skipped,
|
||||
"errors": errors,
|
||||
})
|
||||
return report
|
||||
|
||||
# small context manager used above
|
||||
|
||||
Loading…
Reference in New Issue
Block a user