Update web/core/scripture_normalizer.py
This commit is contained in:
@@ -144,26 +144,42 @@ def _canon_key(raw: str) -> str:
|
|||||||
return key
|
return key
|
||||||
|
|
||||||
def _canon_book(book_raw: str, num: Optional[str] = None) -> Optional[str]:
|
def _canon_book(book_raw: str, num: Optional[str] = None) -> Optional[str]:
|
||||||
key = _canon_key(book_raw)
|
"""
|
||||||
|
Map a raw book token to our canonical abbreviation.
|
||||||
|
Strategy:
|
||||||
|
1) Try exact/alias matches (with dots removed) ✅
|
||||||
|
2) Try the same again with ALL spaces removed ✅ (so 'i sa' -> 'isa')
|
||||||
|
3) Only then, try the 'leading 1/2/3 or i/ii/iii + rest' heuristic.
|
||||||
|
"""
|
||||||
|
key = _canon_key(book_raw) # lower, trimmed, dots removed, single spaces
|
||||||
|
key_nospace = key.replace(" ", "") # used to catch 'i sa' -> 'isa'
|
||||||
|
|
||||||
|
# 1) direct lookup (e.g., 'isa', 'isaiah', 'ps', '1 corinthians', etc.)
|
||||||
|
if key in BOOK_MAP:
|
||||||
|
return BOOK_MAP[key]
|
||||||
|
if key_nospace in BOOK_MAP:
|
||||||
|
return BOOK_MAP[key_nospace]
|
||||||
|
|
||||||
|
# 2) special explicit “number + short key” combos
|
||||||
if num and key in ("co", "cor"):
|
if num and key in ("co", "cor"):
|
||||||
return f"{num} Cor."
|
return f"{num} Cor."
|
||||||
if num and key in ("ti", "tim"):
|
if num and key in ("ti", "tim"):
|
||||||
return f"{num} Tim."
|
return f"{num} Tim."
|
||||||
if key in BOOK_MAP:
|
|
||||||
val = BOOK_MAP[key]
|
# 3) heuristic: split a leading number/roman from the rest (e.g., 'i john' or '2ki')
|
||||||
return val
|
m = re.match(r"(?P<n>[1-3]|i{1,3})(?P<rest>[A-Za-z].*)$", key_nospace)
|
||||||
# FIXED: correct named group syntax here
|
|
||||||
m = re.match(r"(?P<n>[1-3]|i{1,3})(?P<rest>[A-Za-z].*)$", key)
|
|
||||||
if m:
|
if m:
|
||||||
n = m.group("n")
|
n = {"i": "1", "ii": "2", "iii": "3"}.get(m.group("n"), m.group("n"))
|
||||||
rest = m.group("rest")
|
rest = m.group("rest")
|
||||||
if rest in BOOK_MAP:
|
if rest in BOOK_MAP:
|
||||||
base = BOOK_MAP[rest]
|
base = BOOK_MAP[rest]
|
||||||
n = {"i": "1", "ii": "2", "iii": "3"}.get(n, n)
|
# If base already begins with a number (rare in our map), replace it with n
|
||||||
if base[0] in "123":
|
if base[0] in "123":
|
||||||
return f"{n} {base.split(' ', 1)[1]}"
|
return f"{n} {base.split(' ', 1)[1]}"
|
||||||
return f"{n} {base}"
|
return f"{n} {base}"
|
||||||
return BOOK_CANON.get(key)
|
|
||||||
|
# 4) last‑resort: try the raw key against canonical map
|
||||||
|
return BOOK_CANON.get(key_nospace) or BOOK_CANON.get(key)
|
||||||
|
|
||||||
def _parse_segment(seg: str, last_book: Optional[str]) -> Tuple[Optional[str], Optional[str], bool]:
|
def _parse_segment(seg: str, last_book: Optional[str]) -> Tuple[Optional[str], Optional[str], bool]:
|
||||||
original = seg.strip()
|
original = seg.strip()
|
||||||
|
|||||||
Reference in New Issue
Block a user