Illustrations/web/static/js/source-validator.v1.js

110 lines
3.9 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* source-validator.v1.js
Centralizes logic for deciding if a "Source" string should link to WOL.
Loads publication codes from /static/data/wol-pub-codes.v1.json.
Exposes:
- SourceValidator.isWOLSource(text) -> boolean
- SourceValidator.buildWOLSearchURL(text) -> string
*/
window.SourceValidator = (function () {
// ---- Load publication codes (sync so callers can use API immediately) ----
function loadPubCodesSync() {
try {
var xhr = new XMLHttpRequest();
xhr.open("GET", "/static/data/wol-pub-codes.v1.json", false); // synchronous
xhr.send(null);
if (xhr.status >= 200 && xhr.status < 300) {
var data = JSON.parse(xhr.responseText || "{}");
if (data && Array.isArray(data.pub_codes)) {
// de-duplicate and normalize to lowercase strings
var uniq = Object.create(null), out = [];
for (var i = 0; i < data.pub_codes.length; i++) {
var c = String(data.pub_codes[i] || "").trim().toLowerCase();
if (!c) continue;
if (!uniq[c]) { uniq[c] = 1; out.push(c); }
}
return out;
}
}
} catch (e) {
// fall through to fallback
}
// Fallback (very small set) — only used if JSON cannot be loaded
return ["w", "wp", "ws", "g", "rs"];
}
// Publications / codes loaded from JSON
var PUB_CODES = loadPubCodesSync();
// Choose the longest matching code at the start (so "ws" beats "w").
var PUB_CODES_SORTED = PUB_CODES.slice().sort(function (a, b) { return b.length - a.length; });
// Year validation rules (applies only if a year can be parsed from the source).
// Watchtower (w/wp/ws) back to 1950; Awake (g) back to 1970.
var YEAR_RULES = [
{ codes: ["w","wp","ws"], minYear: 1950 },
{ codes: ["g"], minYear: 1970 }
];
// Normalize helper
function normalize(s) { return (s || "").trim().toLowerCase(); }
function leadingCode(textLower) {
for (var i = 0; i < PUB_CODES_SORTED.length; i++) {
var code = PUB_CODES_SORTED[i];
if (textLower.indexOf(code) === 0) return code;
}
return null;
}
// Try to extract a year that appears right after the leading code (allow spaces),
// accepting either 4-digit (e.g., 1955, 2001) or 2-digit (e.g., 55, 95, 12) forms.
function extractYearAfterCode(textLower, code) {
var s = textLower.slice(code.length).trim();
// 1) Look for a 4-digit year first (18002099)
var m = s.match(/\b(1[89]\d{2}|20\d{2})\b/);
if (m) return parseInt(m[1], 10);
// 2) If not found, accept a 2-digit year at the *start* of the remainder
// (e.g., "w55 1/1", "w 95", "g70 1/22")
m = s.match(/^\s*(\d{2})\b/);
if (m) {
var yy = parseInt(m[1], 10);
if (code === "g") return yy >= 70 ? (1900 + yy) : (2000 + yy);
if (code === "w" || code === "wp" || code === "ws")
return yy >= 50 ? (1900 + yy) : (2000 + yy);
return yy >= 70 ? (1900 + yy) : (2000 + yy);
}
// No recognizable year → don't enforce year limits
return null;
}
function passesYearRuleIfPresent(textLower, code) {
var rule = null;
for (var i=0;i<YEAR_RULES.length;i++){
if (YEAR_RULES[i].codes.indexOf(code) !== -1) { rule = YEAR_RULES[i]; break; }
}
if (!rule) return true; // no year rule for this pub
var y = extractYearAfterCode(textLower, code);
if (y == null) return true; // no year present → allow
return y >= rule.minYear;
}
function isWOLSource(text) {
var t = normalize(text);
if (!t) return false;
var code = leadingCode(t);
if (!code) return false;
// If starts with a known pub code, its WOL-capable — but enforce year rules where applicable.
return passesYearRuleIfPresent(t, code);
}
function buildWOLSearchURL(text) {
var q = encodeURIComponent(text || "");
return "https://wol.jw.org/en/wol/l/r1/lp-e?q=" + q;
}
return { isWOLSource: isWOLSource, buildWOLSearchURL: buildWOLSearchURL };
})();