Source code for core.utils.bible_parser

# -*- coding: utf-8 -*-
"""
:File: EuljiroWorship/core/utils/bible_parser.py
:Author: Benjamin Jaedon Choi - https://github.com/saintbenjamin
:Affiliated Church: The Eulji-ro Presbyterian Church [대한예수교장로회(통합) 을지로교회]
:Address: The Eulji-ro Presbyterian Church, 24-10, Eulji-ro 20-gil, Jung-gu, Seoul 04549, South Korea
:Telephone: +82-2-2266-3070
:E-mail: euljirochurch [at] G.M.A.I.L. (replace [at] with @ and G.M.A.I.L as you understood.)
:License: MIT License with Attribution Requirement (see LICENSE file for details); Copyright (c) 2025 The Eulji-ro Presbyterian Church.

Parses Bible reference strings and resolves book name aliases from user input.

This module provides lightweight parsing utilities for converting
user-entered Bible reference strings into structured components
(book ID, chapter, verse range).

Key responsibilities:

- Resolve book name aliases into canonical internal IDs
- Parse flexible reference formats such as:

    - "요 3"
    - "요한복음 3:16"
    - "John 3:14-16"

- Support chapter-only references and full verse ranges

The parser is intentionally permissive and designed for use in
both CLI and GUI contexts.
"""

import re
import json

from core.config import paths

# ─────────────────────────────────────────────
# Load book name aliases from JSON at module load time
try:
    with open(paths.ALIASES_BOOK_FILE, encoding="utf-8") as f:
        BOOK_ALIASES = json.load(f)
except Exception as e:
    print(f"[!] Failed to load aliases_book.json: {e}")
    BOOK_ALIASES = {}
# ─────────────────────────────────────────────


[docs]
def resolve_book_name(name: str, lang_map: dict = None, lang_code: str = "ko") -> str | None:
    """
    Resolve a user-provided book name to a canonical internal book ID.

    This function attempts resolution using multiple strategies:

    1. Direct alias matching from ``BOOK_ALIASES``
    2. Reverse matching against canonical IDs
    3. Optional fallback using localized names from :py:data:`core.config.paths.STANDARD_BOOK_FILE`

    All comparisons are performed using normalized strings
    (lowercased, whitespace and dot characters removed).

    Args:
        name (str):
            Raw book name from user input
            (e.g., "요삼", "1Jn", "Genesis").
        lang_map (dict, optional):
            Mapping loaded from `standard_book.json`,
            structured as { book_id: { "ko": ..., "en": ... } }.
        lang_code (str, optional):
            Language key used when matching localized names.
            Defaults to "ko".

    Returns:
        str | None:
            Canonical internal book ID (e.g., "3John"),
            or None if no match is found.
    """
    if not name:
        return None

    raw = name.strip()
    normalized = raw.lower().replace(" ", "").replace(".", "")

    # 1. Try direct alias match (with normalization)
    for alias, canonical in BOOK_ALIASES.items():
        alias_norm = alias.strip().lower().replace(" ", "").replace(".", "")
        if normalized == alias_norm:
            return canonical

    # 2. Reverse match if name is already canonical
    for canonical in BOOK_ALIASES.values():
        if normalized == canonical.lower().replace(" ", "").replace(".", ""):
            return canonical

    # 3. Fallback: optional standard book name matching
    if lang_map:
        for key, names in lang_map.items():
            local = names.get(lang_code, "").lower().replace(" ", "").replace(".", "")
            en = names.get("en", "").lower().replace(" ", "").replace(".", "")
            if normalized == local or normalized == en:
                return key

    return None



[docs]
def parse_reference(text: str):
    """
    Parse a Bible reference string into structured components.

    Supported input formats include:
        - "<book> <chapter>"
        - "<book> <chapter>:<verse>"
        - "<book> <chapter>:<start>-<end>"

    Examples:

        - "요 3"
        - "요한복음 3:16"
        - "John 3:14-16"

    If only a chapter is provided, the verse range is interpreted
    as the full chapter.

    Args:
        text (str):
            Raw reference string entered by the user.

    Returns:
        tuple[str, int, tuple[int, int]] | None:
            A tuple of ``(book_id, chapter_number, verse_range)``,
            where ``verse_range`` is ``(start, end)`` and ``end == -1``
            indicates the full chapter.

            Returns None if parsing or resolution fails.
    """
    text = text.strip()

    # Modified regex to also match "<book> <chapter>" (verse omitted)
    m = re.match(r"(.+?)\s*(\d+)(?::(\d+)(?:-(\d+))?)?", text)
    if not m:
        return None

    book_str, chapter_str, verse_start_str, verse_end_str = m.groups()

    # Resolve book name using alias map
    book_id = resolve_book_name(book_str)
    if not book_id:
        return None

    # Convert string components to integers
    chapter = int(chapter_str)

    # Support chapter-only input (e.g., "John 3")
    if verse_start_str is None:
        return book_id, chapter, (1, -1)

    verse_start = int(verse_start_str)
    verse_end = int(verse_end_str) if verse_end_str else verse_start

    # Sanity check: verse range must be ascending
    if verse_end < verse_start:
        return None

    # Return all verses in range as tuple
    return book_id, chapter, (verse_start, verse_end)