Update web/core/scripture_normalizer.py
This commit is contained in:
parent
fb74fe1ebf
commit
cbb60cc3cd
@ -144,26 +144,42 @@ def _canon_key(raw: str) -> str:
|
||||
return key
|
||||
|
||||
def _canon_book(book_raw: str, num: Optional[str] = None) -> Optional[str]:
|
||||
key = _canon_key(book_raw)
|
||||
"""
|
||||
Map a raw book token to our canonical abbreviation.
|
||||
Strategy:
|
||||
1) Try exact/alias matches (with dots removed) ✅
|
||||
2) Try the same again with ALL spaces removed ✅ (so 'i sa' -> 'isa')
|
||||
3) Only then, try the 'leading 1/2/3 or i/ii/iii + rest' heuristic.
|
||||
"""
|
||||
key = _canon_key(book_raw) # lower, trimmed, dots removed, single spaces
|
||||
key_nospace = key.replace(" ", "") # used to catch 'i sa' -> 'isa'
|
||||
|
||||
# 1) direct lookup (e.g., 'isa', 'isaiah', 'ps', '1 corinthians', etc.)
|
||||
if key in BOOK_MAP:
|
||||
return BOOK_MAP[key]
|
||||
if key_nospace in BOOK_MAP:
|
||||
return BOOK_MAP[key_nospace]
|
||||
|
||||
# 2) special explicit “number + short key” combos
|
||||
if num and key in ("co", "cor"):
|
||||
return f"{num} Cor."
|
||||
if num and key in ("ti", "tim"):
|
||||
return f"{num} Tim."
|
||||
if key in BOOK_MAP:
|
||||
val = BOOK_MAP[key]
|
||||
return val
|
||||
# FIXED: correct named group syntax here
|
||||
m = re.match(r"(?P<n>[1-3]|i{1,3})(?P<rest>[A-Za-z].*)$", key)
|
||||
|
||||
# 3) heuristic: split a leading number/roman from the rest (e.g., 'i john' or '2ki')
|
||||
m = re.match(r"(?P<n>[1-3]|i{1,3})(?P<rest>[A-Za-z].*)$", key_nospace)
|
||||
if m:
|
||||
n = m.group("n")
|
||||
n = {"i": "1", "ii": "2", "iii": "3"}.get(m.group("n"), m.group("n"))
|
||||
rest = m.group("rest")
|
||||
if rest in BOOK_MAP:
|
||||
base = BOOK_MAP[rest]
|
||||
n = {"i": "1", "ii": "2", "iii": "3"}.get(n, n)
|
||||
# If base already begins with a number (rare in our map), replace it with n
|
||||
if base[0] in "123":
|
||||
return f"{n} {base.split(' ', 1)[1]}"
|
||||
return f"{n} {base}"
|
||||
return BOOK_CANON.get(key)
|
||||
|
||||
# 4) last‑resort: try the raw key against canonical map
|
||||
return BOOK_CANON.get(key_nospace) or BOOK_CANON.get(key)
|
||||
|
||||
def _parse_segment(seg: str, last_book: Optional[str]) -> Tuple[Optional[str], Optional[str], bool]:
|
||||
original = seg.strip()
|
||||
|
||||
Loading…
Reference in New Issue
Block a user