import csv import io import re from dateutil import parser as dateparser from datetime import date from core.models import Entry, ScriptureRef EXPECTED_HEADERS = [h.lower() for h in [ "Subject","Illustration","Application","Scripture","Source", "Talk Title","Talk Number","Code","Date","Date Edited" ]] def _sniff(text: str): sample = text[:8192] try: dialect = csv.Sniffer().sniff(sample, delimiters=",;\t|") except Exception: class _Simple(csv.Dialect): delimiter = ',' quotechar = '"' escapechar = None doublequote = True skipinitialspace = True lineterminator = '\n' quoting = csv.QUOTE_MINIMAL dialect = _Simple return dialect def _as_dictreader(text: str, dialect, fieldnames=None): sio = io.StringIO(text) if fieldnames is None: reader = csv.DictReader(sio, dialect=dialect) else: reader = csv.DictReader(sio, dialect=dialect, fieldnames=fieldnames) first = next(reader, None) if first is not None: matches = sum(1 for k, v in first.items() if (v or "").strip().lower() == k.strip().lower()) if matches < 5: yield first for row in reader: yield row return for row in reader: yield row def parse_scripture(scripture_str): """ Placeholder scripture parser — adjust as needed. """ if not scripture_str: return [] # Very basic parsing, could be replaced with real logic return [{"raw": scripture_str}] def import_csv_bytes(b: bytes, dry_run=True): """ Robust import: - Auto-detect delimiter (comma/semicolon/tab/pipe). - If required headers are missing, re-parse treating file as *headerless* using the canonical column order. - Upsert by Code; skip rows that are entirely empty. """ text = b.decode("utf-8-sig", errors="replace") dialect = _sniff(text) reader1 = csv.DictReader(io.StringIO(text), dialect=dialect) headers1 = [(h or "").strip().lower() for h in (reader1.fieldnames or [])] used_headerless = False if not headers1 or sum(h in EXPECTED_HEADERS for h in headers1) < 5: used_headerless = True rows_iter = _as_dictreader(text, dialect, fieldnames=EXPECTED_HEADERS) else: rows_iter = (row for row in reader1) report = { "rows": 0, "inserted": 0, "updated": 0, "skipped": 0, "errors": [], "scripture_parsed": 0, "scripture_failed": 0, "dialect_delimiter": getattr(dialect, "delimiter", "?"), "used_headerless_mode": used_headerless, "seen_headers": headers1, } def parse_date_safe(v): if not v or not str(v).strip(): return None try: return dateparser.parse(str(v)).date() except Exception: return None for row in rows_iter: report["rows"] += 1 try: row_lc = {(k or "").strip().lower(): (v or "") for k, v in row.items()} subj = (row_lc.get("subject") or "").strip() illu = (row_lc.get("illustration") or "").strip() appl = (row_lc.get("application") or "").strip() scr = (row_lc.get("scripture") or "").strip() src = (row_lc.get("source") or "").strip() tt = (row_lc.get("talk title") or "").strip() tnum = (row_lc.get("talk number") or "").strip() code = (row_lc.get("code") or "").strip() dadd = parse_date_safe(row_lc.get("date")) ded = parse_date_safe(row_lc.get("date edited")) try: tnum = int(tnum) if tnum else None except Exception: tnum = None if not any([subj, illu, appl, scr, src, tt, code, tnum, dadd, ded]): report["skipped"] += 1 continue data = dict( subject=subj, illustration=illu, application=appl, scripture_raw=scr, source=src, talk_number=tnum, talk_title=tt, entry_code=code, date_added=dadd, date_edited=ded ) parsed = parse_scripture(scr) for it in parsed: if it: report["scripture_parsed"] += 1 else: report["scripture_failed"] += 1 if dry_run: continue obj = None if code: try: obj = Entry.objects.get(entry_code=code) except Entry.DoesNotExist: obj = None if obj: for k, v in data.items(): setattr(obj, k, v) obj.save() obj.scripture_refs.all().delete() report["updated"] += 1 else: obj = Entry.objects.create(**data) report["inserted"] += 1 for it in parsed: if it: ScriptureRef.objects.create(entry=obj, **it) except Exception as e: report["skipped"] += 1 report["errors"].append(str(e)) return report