from openai import OpenAI
import re
import difflib
from datetime import datetime, timedelta

# =========================================================
# OPENAI CLIENT
# =========================================================
# NOTE: replace with your real key or use env var.

client = OpenAI(api_key="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")


# =========================================================
# SCHEMA LOADER
# =========================================================
def load_schema() -> str:
    try:
        with open("schema.txt", "r", encoding="utf-8") as f:
            return f.read().strip()
    except Exception:
        # Fallback minimal schema
        return "tbl_order, tbl_store, tbl_payment, tbl_exchange_rate, tbl_order_parent_gateway"


# =========================================================
# MONTH + DATE UTILITIES
# =========================================================
MONTHS = {
    "january": 1,
    "jan": 1,
    "february": 2,
    "feb": 2,
    "march": 3,
    "mar": 3,
    "april": 4,
    "apr": 4,
    "may": 5,
    "june": 6,
    "jun": 6,
    "july": 7,
    "jul": 7,
    "august": 8,
    "aug": 8,
    "september": 9,
    "sep": 9,
    "october": 10,
    "oct": 10,
    "november": 11,
    "nov": 11,
    "december": 12,
    "dec": 12,
}


def month_range(year: int, month: int):
    start = f"{year}-{month:02d}-01"
    if month == 12:
        end = f"{year+1}-01-01"
    else:
        end = f"{year}-{month+1:02d}-01"
    return start, end


def fuzzy_months(q: str):
    """Fuzzy detect all month words (handles typos like 'Novemebr')."""
    ql = q.lower()
    words = re.findall(r"[a-zA-Z]+", ql)
    months = []
    for w in words:
        match = difflib.get_close_matches(w, MONTHS.keys(), n=1, cutoff=0.7)
        if match:
            months.append(MONTHS[match[0]])
    return sorted(set(months))


def extract_date_range(q: str):
    """
    Universal date range extractor – works for any month/year, not just October.
    Returns (start_date, end_date) or (None, None).
    """
    ql = q.lower()
    now = datetime.now()

    # Natural phrases
    if "last year" in ql:
        y = now.year - 1
        return f"{y}-01-01", f"{y+1}-01-01"

    if "this year" in ql or "current year" in ql:
        return f"{now.year}-01-01", f"{now.year+1}-01-01"

    if "year to date" in ql or "ytd" in ql or "so far this year" in ql:
        return f"{now.year}-01-01", now.strftime("%Y-%m-%d")

    if "month to date" in ql or "mtd" in ql:
        return f"{now.year}-{now.month:02d}-01", now.strftime("%Y-%m-%d")

    if "last month" in ql:
        year_val = now.year if now.month > 1 else now.year - 1
        month_val = now.month - 1 if now.month > 1 else 12
        return month_range(year_val, month_val)

    if "this month" in ql or "current month" in ql:
        return month_range(now.year, now.month)

    # last X days
    m = re.search(r"last\s+(\d+)\s+days", ql)
    if m:
        days = int(m.group(1))
        start = now - timedelta(days=days)
        return start.strftime("%Y-%m-%d"), now.strftime("%Y-%m-%d")

    # last X months (approx)
    m = re.search(r"last\s+(\d+)\s+months", ql)
    if m:
        months = int(m.group(1))
        start = now - timedelta(days=months * 30)
        return start.strftime("%Y-%m-%d"), now.strftime("%Y-%m-%d")

    # Multiple months + year
    year_match = re.search(r"(20\d{2})", ql)
    months_found = fuzzy_months(q)
    if year_match and len(months_found) >= 2:
        year = int(year_match.group(1))
        m_start = months_found[0]
        m_end = months_found[-1]
        start = f"{year}-{m_start:02d}-01"
        if m_end < 12:
            end = f"{year}-{m_end+1:02d}-01"
        else:
            end = f"{year+1}-01-01"
        return start, end

    # Range “from X to Y 2025”
    mrange = re.search(
        r"(?:from|between)\s+([a-zA-Z]+)\s+(?:to|and|-)\s+([a-zA-Z]+)\s+(20\d{2})",
        ql,
    )
    if mrange:
        m1 = MONTHS.get(mrange.group(1).lower())
        m2 = MONTHS.get(mrange.group(2).lower())
        year = int(mrange.group(3))
        if m1 and m2:
            start_m = min(m1, m2)
            end_m = max(m1, m2)
            start = f"{year}-{start_m:02d}-01"
            if end_m < 12:
                end = f"{year}-{end_m+1:02d}-01"
            else:
                end = f"{year+1}-01-01"
            return start, end

    # Single month + year: "october 2025", "oct 2025"
    year_match = re.search(r"(20\d{2})", ql)
    months_found = fuzzy_months(q)
    if year_match and len(months_found) == 1:
        year = int(year_match.group(1))
        return month_range(year, months_found[0])

    # Year only
    if year_match:
        year = int(year_match.group(1))
        return f"{year}-01-01", f"{year+1}-01-01"

    return None, None


# =========================================================
# CURRENCY HELPERS
# =========================================================
CURRENCY_MAP = {
    "usd": 2,
    "dollar": 2,
    "dollars": 2,
    "lkr": 5,
    "rs": 5,
    "rupee": 5,
    "rupees": 5,
    "gbp": 1,
    "pound": 1,
    "pounds": 1,
    "eur": 3,
    "euro": 3,
    "euros": 3,
    "aud": 6,
    "australian": 6,
}

# Optional: nice labels per currency_id for column aliases
CURRENCY_LABEL = {
    1: "gbp",
    2: "usd",
    3: "eur",
    5: "lkr",
    6: "aud",
}


def detect_currency_ids(question: str):
    """Return list of *all* currency_ids mentioned in the question."""
    ql = question.lower()
    ids = []
    for token, cid in CURRENCY_MAP.items():
        if token in ql and cid not in ids:
            ids.append(cid)
    return ids


def primary_currency_from_question(question: str):
    """Return the first detected currency_id + name (for backwards compatibility)."""
    ql = question.lower()
    for token, cid in CURRENCY_MAP.items():
        if token in ql:
            return cid, token
    return None, None


# =========================================================
# INTENT ANALYSIS
# =========================================================
def analyze_intent(question: str):
    ql = question.lower()
    intent = {
        "type": "generic",  # 'revenue', 'gmv', 'mdr', 'volume', 'count', 'schema', 'generic'
        "metric": None,
        "date_start": None,
        "date_end": None,
        "currency_id": None,
        "currency_name": None,
    }

    # detect any currency word
    has_currency_word = any(token in ql for token in CURRENCY_MAP.keys())

    # Volume questions: treat "volume" and "total transactions" the same (approved count)
    if (
        "volume of transaction" in ql
        or "volume of transactions" in ql
        or "total volume of transaction" in ql
        or "transaction volume" in ql
        or ("volume" in ql and "transaction" in ql)
        or "total transactions" in ql
        # also "usd volume", "lkr volume" etc. (but not "sales volume")
        or ("volume" in ql and has_currency_word and "sales volume" not in ql)
    ):
        intent["type"] = "volume"
        intent["metric"] = "count"

    # Metric / type (only if not already classified as volume)
    if intent["type"] == "generic":
        # Explicit MDR (overall MDR, not “bank mdr”)
        if "mdr" in ql and "bank" not in ql:
            intent["type"] = "mdr"
            intent["metric"] = "mdr"
        elif "revenue" in ql or "profit" in ql or "margin" in ql:
            intent["type"] = "revenue"
            intent["metric"] = "revenue"
        elif (
            "gmv" in ql
            or "sales volume" in ql
            or "turnover" in ql
            or "total amount" in ql
            or "amount" in ql
            or "gross amount" in ql
            or "amount collected" in ql
            or "value" in ql  # e.g. "total lkr value"
        ):
            intent["type"] = "gmv"
            intent["metric"] = "gmv"
        elif (
            "how many" in ql
            or "count" in ql
            or "number of" in ql
            or "transaction count" in ql
            or "total transaction" in ql  # e.g. "total transaction for october 2025"
        ):
            intent["type"] = "count"
            intent["metric"] = "count"

    if (
        "show tables" in ql
        or "list tables" in ql
        or "columns in" in ql
        or "schema" in ql
    ):
        intent["type"] = "schema"

    # Date range
    ds, de = extract_date_range(question)
    intent["date_start"], intent["date_end"] = ds, de

    # Primary currency (for revenue / legacy)
    cid, cname = primary_currency_from_question(question)
    intent["currency_id"] = cid
    intent["currency_name"] = cname

    return intent


# =========================================================
# REVENUE SQL
# =========================================================
def build_revenue_sql(intent: dict) -> str:
    """
    Revenue queries.

    IMPORTANT:
    - GMV in this function is always expressed in LKR:
        * LKR GMV + USD GMV converted to LKR + other currencies converted to LKR
      using the same base_lkr logic used everywhere else.
    """
    ds = intent["date_start"]
    de = intent["date_end"]
    cid = intent["currency_id"]

    date_filter = ""
    if ds and de:
        date_filter = (
            f"AND p.date_time_transaction >= '{ds}' "
            f"AND p.date_time_transaction < '{de}'"
        )

    currency_filter = ""
    if cid:
        currency_filter = f"AND o.processing_currency_id = {cid}"

    # Special USD path (explicit USD revenue)
    if cid == 2:
        sql = f"""
SELECT 
    COUNT(*) AS transaction_count,

    SUM(o.total_amount) AS total_gmv_usd,

    SUM(
        ROUND(
            o.total_amount * (
                o.payment_gateway_rate - 
                CASE
                    WHEN o.order_type_id = 3 
                        THEN (o.bank_payment_gateway_rate + COALESCE(opg.parent_gateway_rate, 0))
                    ELSE o.bank_payment_gateway_rate
                END
            ) / 100.0
        , 6)
    ) AS total_revenue_usd,

    SUM(
        (
            CASE
                WHEN o.processing_currency_id = 5 THEN o.total_amount

                WHEN o.exchange_rate IS NOT NULL AND o.exchange_rate <> '' 
                     AND o.exchange_rate REGEXP '^[0-9.]+$'
                THEN o.total_amount * CAST(o.exchange_rate AS DECIMAL(18,6))

                ELSE o.total_amount * (
                    SELECT er.buying_rate 
                    FROM webxpay_master.tbl_exchange_rate er
                    WHERE er.currency_id = o.processing_currency_id
                      AND er.date <= DATE(p.date_time_transaction)
                    ORDER BY er.date DESC
                    LIMIT 1
                )
            END
        ) * (
            (
                o.payment_gateway_rate - 
                CASE 
                    WHEN o.order_type_id = 3 
                        THEN (o.bank_payment_gateway_rate + COALESCE(opg.parent_gateway_rate,0))
                    ELSE o.bank_payment_gateway_rate
                END
            ) / 100.0
        )
    ) AS total_revenue_lkr

FROM webxpay_master.tbl_order o
JOIN webxpay_master.tbl_payment p 
      ON p.payment_id = o.payment_id
LEFT JOIN webxpay_master.tbl_order_parent_gateway opg 
      ON opg.order_id = o.order_id
WHERE 
    o.payment_status_id = 2
    AND o.processing_currency_id = 2
    {date_filter};
"""
        return sql.strip()

    # LKR / other currencies path (overall revenue; GMV must be in LKR)
    fx = """
        SELECT er.buying_rate FROM tbl_exchange_rate er
        WHERE er.currency_id = o.processing_currency_id
          AND er.date <= DATE(p.date_time_transaction)
        ORDER BY er.date DESC LIMIT 1
    """

    base_lkr = f"""
        CASE
            WHEN o.processing_currency_id = 5 THEN o.total_amount
            ELSE
                CASE 
                    WHEN o.exchange_rate IS NOT NULL 
                         AND o.exchange_rate NOT LIKE '' 
                         AND o.exchange_rate REGEXP '^[0-9]+(\\.[0-9]+)?$'
                    THEN o.total_amount * o.exchange_rate
                    ELSE o.total_amount * ({fx})
                END
        END
    """

    fee = """
        (
            o.payment_gateway_rate -
            CASE 
                WHEN o.order_type_id = 3 
                    THEN (o.bank_payment_gateway_rate + COALESCE(opg.parent_gateway_rate,0))
                ELSE o.bank_payment_gateway_rate
            END
        ) / 100.0
    """

    # IMPORTANT: Revenue is base_lkr * fee, summed, rounded at the end
    rev_lkr_raw = f"(({base_lkr}) * ({fee}))"

    sql = f"""
SELECT
    -- GMV in LKR (LKR + USD→LKR + other FX→LKR)
    SUM({base_lkr}) AS total_gmv_lkr,
    -- Revenue in LKR based on the same LKR GMV base
    ROUND(SUM({rev_lkr_raw})) AS total_revenue_lkr
FROM tbl_order o
JOIN tbl_payment p ON p.payment_id = o.payment_id
LEFT JOIN tbl_order_parent_gateway opg ON opg.order_id = o.order_id
WHERE o.payment_status_id = 2
    {currency_filter}
    {date_filter};
"""
    return sql.strip()


# =========================================================
# MDR SQL (TOTAL MDR = BASE_LKR * PG_RATE, ROUNDED PER TXN)
# =========================================================
def build_mdr_sql(intent: dict) -> str:
    """
    Handles "total mdr ..." style questions (overall MDR, not bank-only).
    MDR in LKR = ROUND(base amount in LKR * (payment_gateway_rate / 100), 2) per transaction, then summed.
    """
    ds = intent["date_start"]
    de = intent["date_end"]
    cid = intent["currency_id"]

    date_filter = ""
    if ds and de:
        date_filter = (
            f"AND p.date_time_transaction >= '{ds}' "
            f"AND p.date_time_transaction < '{de}'"
        )

    currency_filter = ""
    if cid:
        currency_filter = f"AND o.processing_currency_id = {cid}"

    fx = """
        SELECT er.buying_rate FROM tbl_exchange_rate er
        WHERE er.currency_id = o.processing_currency_id
          AND er.date <= DATE(p.date_time_transaction)
        ORDER BY er.date DESC LIMIT 1
    """

    base_lkr = f"""
        CASE
            WHEN o.processing_currency_id = 5 THEN o.total_amount
            ELSE
                CASE 
                    WHEN o.exchange_rate IS NOT NULL 
                         AND o.exchange_rate NOT LIKE '' 
                         AND o.exchange_rate REGEXP '^[0-9]+(\\.[0-9]+)?$'
                    THEN o.total_amount * o.exchange_rate
                    ELSE o.total_amount * ({fx})
                END
        END
    """

    # round MDR per transaction to 2 decimals, then sum
    mdr_expr = f"ROUND(({base_lkr}) * (o.payment_gateway_rate / 100.0), 2)"

    sql = f"""
SELECT
    SUM({mdr_expr}) AS total_mdr_lkr
FROM tbl_order o
JOIN tbl_payment p ON p.payment_id = o.payment_id
LEFT JOIN tbl_order_parent_gateway opg ON opg.order_id = o.order_id
WHERE o.payment_status_id = 2
  {currency_filter}
  {date_filter};
"""
    return sql.strip()


# =========================================================
# GMV / VALUE SQL (AMOUNT)
# =========================================================
def _has_merchant_grouping(q: str) -> bool:
    ql = q.lower()
    return any(
        phrase in ql
        for phrase in [
            "each merchant",
            "per merchant",
            "by merchant",
            "merchant wise",
            "merchant-wise",
            "merchantwise",
        ]
    )


def build_gmv_sql(question: str, intent: dict) -> str:
    """
    Handles GMV / value style questions:
      - total lkr value october 2025
      - total usd value october 2025
      - total usd and lkr value october 2025
      - total amount from january to october 2025

    IMPORTANT BUSINESS RULE:
    - When combining currencies into a single "total amount" / "total GMV",
      we MUST convert everything to LKR and sum:
        total_lkr_gmv + (usd_gmv converted to LKR) [+ other FX]
      i.e. use base_lkr, NEVER raw SUM(o.total_amount) across currencies.

    - When the user EXPLICITLY asks for "LKR GMV only" or "USD GMV only",
      we can sum o.total_amount for that processing_currency_id.
    """
    ds = intent["date_start"]
    de = intent["date_end"]

    date_filter = ""
    if ds and de:
        date_filter = (
            f"AND p.date_time_transaction >= '{ds}' "
            f"AND p.date_time_transaction < '{de}'"
        )

    currency_ids = detect_currency_ids(question)

    # FX + base_lkr expression (same logic as revenue/MDR)
    fx = """
        SELECT er.buying_rate FROM tbl_exchange_rate er
        WHERE er.currency_id = o.processing_currency_id
          AND er.date <= DATE(p.date_time_transaction)
        ORDER BY er.date DESC LIMIT 1
    """

    base_lkr = f"""
        CASE
            WHEN o.processing_currency_id = 5 THEN o.total_amount
            ELSE
                CASE 
                    WHEN o.exchange_rate IS NOT NULL 
                         AND o.exchange_rate NOT LIKE '' 
                         AND o.exchange_rate REGEXP '^[0-9]+(\\.[0-9]+)?$'
                    THEN o.total_amount * o.exchange_rate
                    ELSE o.total_amount * ({fx})
                END
        END
    """

    # Merchant-wise GMV: "for each merchant", "by merchant", etc.
    if _has_merchant_grouping(question):
        currency_filter = ""
        if currency_ids:
            ids_str = ",".join(str(c) for c in currency_ids)
            currency_filter = f"AND o.processing_currency_id IN ({ids_str})"

        # Merchant GMV is always in LKR-equivalent
        return f"""
SELECT 
    s.store_id,
    s.doing_business_name,
    SUM({base_lkr}) AS total_gmv_lkr
FROM tbl_order o
JOIN tbl_payment p ON p.payment_id = o.payment_id
JOIN tbl_store s ON s.store_id = o.store_id
WHERE o.payment_status_id = 2
  {currency_filter}
  {date_filter}
GROUP BY s.store_id, s.doing_business_name
ORDER BY total_gmv_lkr DESC;
""".strip()

    # No currency mentioned → total GMV across all, in LKR (LKR + USD converted + others)
    if not currency_ids:
        return f"""
SELECT 
    SUM({base_lkr}) AS total_gmv_lkr
FROM tbl_order o
JOIN tbl_payment p ON p.payment_id = o.payment_id
WHERE o.payment_status_id = 2
  {date_filter};
""".strip()

    # One currency → simple sum in that currency (explicit request: LKR-only / USD-only GMV)
    if len(currency_ids) == 1:
        cid = currency_ids[0]
        code = CURRENCY_LABEL.get(cid, f"cur_{cid}")
        label = f"total_{code}_value"

        return f"""
SELECT 
    SUM(o.total_amount) AS {label}
FROM tbl_order o
JOIN tbl_payment p ON p.payment_id = o.payment_id
WHERE o.payment_status_id = 2
  AND o.processing_currency_id = {cid}
  {date_filter};
""".strip()

    # Multiple currencies (e.g. usd and lkr) → per-currency native + combined LKR
    select_parts = []
    filter_ids = ",".join(str(c) for c in currency_ids)

    for cid in currency_ids:
        code = CURRENCY_LABEL.get(cid, f"cur_{cid}")
        col_name = f"total_{code}_value"
        select_parts.append(
            f"SUM(CASE WHEN o.processing_currency_id = {cid} THEN o.total_amount ELSE 0 END) AS {col_name}"
        )

    # Combined LKR GMV across all mentioned currencies (convert then sum)
    select_parts.append(f"SUM({base_lkr}) AS total_value_all_currencies_lkr")

    select_clause = ",\n    ".join(select_parts)

    return f"""
SELECT
    {select_clause}
FROM tbl_order o
JOIN tbl_payment p ON p.payment_id = o.payment_id
WHERE o.payment_status_id = 2
  AND o.processing_currency_id IN ({filter_ids})
  {date_filter};
""".strip()


# =========================================================
# VOLUME SQL (TRANSACTION COUNT)
# =========================================================
def build_volume_sql(question: str, intent: dict) -> str:
    """
    Handles volume (transaction count) style questions:
      - total usd volume october 2025
      - total lkr volume october 2025
      - total usd and lkr volume october 2025
      - total transaction count october 2025
      - total volume of transaction for october 2025
      - usd volume september 2025
    Uses processing_currency_id and p.date_time_transaction.
    Always filters approved (payment_status_id = 2).
    """
    ds = intent["date_start"]
    de = intent["date_end"]

    date_filter = ""
    if ds and de:
        date_filter = (
            f"AND p.date_time_transaction >= '{ds}' "
            f"AND p.date_time_transaction < '{de}'"
        )

    currency_ids = detect_currency_ids(question)

    # Merchant-wise volume
    if _has_merchant_grouping(question):
        currency_filter = ""
        if currency_ids:
            ids_str = ",".join(str(c) for c in currency_ids)
            currency_filter = f"AND o.processing_currency_id IN ({ids_str})"

        return f"""
SELECT
    s.store_id,
    s.doing_business_name,
    COUNT(*) AS txn_volume
FROM tbl_order o
JOIN tbl_payment p ON p.payment_id = o.payment_id
JOIN tbl_store s ON s.store_id = o.store_id
WHERE o.payment_status_id = 2
  {currency_filter}
  {date_filter}
GROUP BY s.store_id, s.doing_business_name
ORDER BY txn_volume DESC;
""".strip()

    # No currency mentioned → all currencies volume
    if not currency_ids:
        return f"""
SELECT
    COUNT(*) AS total_volume
FROM tbl_order o
JOIN tbl_payment p ON p.payment_id = o.payment_id
WHERE o.payment_status_id = 2
  {date_filter};
""".strip()

    # One currency → simple volume
    if len(currency_ids) == 1:
        cid = currency_ids[0]
        code = CURRENCY_LABEL.get(cid, f"cur_{cid}")
        label = f"total_{code}_volume"

        return f"""
SELECT
    COUNT(*) AS {label}
FROM tbl_order o
JOIN tbl_payment p ON p.payment_id = o.payment_id
WHERE o.payment_status_id = 2
  AND o.processing_currency_id = {cid}
  {date_filter};
""".strip()

    # Multiple currencies (e.g. "usd and lkr volume") → per-currency and combined
    select_parts = []
    for cid in currency_ids:
        code = CURRENCY_LABEL.get(cid, f"cur_{cid}")
        col_name = f"total_{code}_volume"
        select_parts.append(
            f"SUM(CASE WHEN o.processing_currency_id = {cid} THEN 1 ELSE 0 END) AS {col_name}"
        )

    # Combined volume across all mentioned currencies
    select_parts.append("COUNT(*) AS total_volume")

    select_clause = ",\n    ".join(select_parts)
    ids_str = ",".join(str(c) for c in currency_ids)

    return f"""
SELECT
    {select_clause}
FROM tbl_order o
JOIN tbl_payment p ON p.payment_id = o.payment_id
WHERE o.payment_status_id = 2
  AND o.processing_currency_id IN ({ids_str})
  {date_filter};
""".strip()


# =========================================================
# BUSINESS LOGIC LAYER (NON-REVENUE SHORTCUTS)
# =========================================================
def build_business_sql(question: str, intent: dict) -> str | None:
    """
    Hard-coded domain rules for WEBXPAY, for common patterns where
    we do NOT want GPT to guess.
    """
    ql = question.lower()
    ds = intent["date_start"]
    de = intent["date_end"]


    date_filter = ""
    if ds and de:
        date_filter = (
            f"AND p.date_time_transaction >= '{ds}' "
            f"AND p.date_time_transaction < '{de}'"
        )

    # ---------- 1) TOTAL BANK MDR ("bank rate") IN LKR ----------
    if (
        "bank rate" in ql
        or "bank mdr" in ql
        or ("bank" in ql and "mdr" in ql)
    ):
        currency_ids = detect_currency_ids(question)
        currency_filter = ""
        if currency_ids:
            ids_str = ",".join(str(c) for c in currency_ids)
            currency_filter = f"AND o.processing_currency_id IN ({ids_str})"

        fx = """
            SELECT er.buying_rate 
            FROM tbl_exchange_rate er
            WHERE er.currency_id = o.processing_currency_id
              AND er.date <= DATE(p.date_time_transaction)
            ORDER BY er.date DESC 
            LIMIT 1
        """

        base_lkr = f"""
            CASE
                WHEN o.processing_currency_id = 5 THEN o.total_amount
                ELSE
                    CASE 
                        WHEN o.exchange_rate IS NOT NULL 
                             AND o.exchange_rate NOT LIKE '' 
                             AND o.exchange_rate REGEXP '^[0-9]+(\\.[0-9]+)?$'
                        THEN o.total_amount * o.exchange_rate
                        ELSE o.total_amount * ({fx})
                    END
            END
        """

        bank_component = """
            CASE 
                WHEN o.order_type_id = 3 
                    THEN (o.bank_payment_gateway_rate + COALESCE(opg.parent_gateway_rate,0))
                ELSE o.bank_payment_gateway_rate
            END
        """

        bank_mdr_expr = f"({base_lkr}) * (({bank_component}) / 100.0)"

        sql = f"""
SELECT
    SUM({bank_mdr_expr}) AS total_bank_mdr_lkr
FROM tbl_order o
JOIN tbl_payment p ON p.payment_id = o.payment_id
LEFT JOIN tbl_order_parent_gateway opg ON opg.order_id = o.order_id
WHERE o.payment_status_id = 2
  {currency_filter}
  {date_filter};
""".strip()
        return sql

    # ---- USD to LKR value (GMV + LKR) ----
    usd_to_lkr_pattern = (
        re.search(r"usd\s+to\s+lkr", ql)
        or re.search(r"usd\s+in\s+lkr", ql)
        or "usd value in lkr" in ql
        or "value of usd in lkr" in ql
    )

    if usd_to_lkr_pattern and "revenue" not in ql:
        fx = """
            SELECT er.buying_rate 
            FROM tbl_exchange_rate er
            WHERE er.currency_id = o.processing_currency_id
              AND er.date <= DATE(p.date_time_transaction)
            ORDER BY er.date DESC 
            LIMIT 1
        """

        # USD transactions converted to LKR
        usd_to_lkr_expr = f"""
            CASE
                WHEN o.processing_currency_id = 2 THEN
                    CASE 
                        WHEN o.exchange_rate IS NOT NULL 
                             AND o.exchange_rate NOT LIKE '' 
                             AND o.exchange_rate REGEXP '^[0-9]+(\\.[0-9]+)?$'
                        THEN o.total_amount * o.exchange_rate
                        ELSE o.total_amount * ({fx})
                    END
                ELSE 0
            END
        """

        # Native LKR GMV
        lkr_expr = """
            CASE
                WHEN o.processing_currency_id = 5 THEN o.total_amount
                ELSE 0
            END
        """

        sql = f"""
SELECT
    SUM({usd_to_lkr_expr}) AS total_usd_to_lkr,
    SUM({lkr_expr}) AS total_lkr_value,
    SUM({usd_to_lkr_expr}) + SUM({lkr_expr}) AS total_all_value_lkr
FROM tbl_order o
JOIN tbl_payment p ON p.payment_id = o.payment_id
WHERE o.payment_status_id = 2
  AND o.processing_currency_id IN (2,5)
  {date_filter};
"""
        return sql.strip()

    # ---- Active merchants count ----
    if "merchant" in ql and "active" in ql and (
        "count" in ql or "number" in ql
    ):
        return """
SELECT COUNT(*) AS active_merchants
FROM tbl_store
WHERE is_active = 1
  AND free_trail = 0;
""".strip()

    # ---- Total merchants (not explicitly active) ----
    if "merchant" in ql and (
        "total number" in ql or "number of" in ql or "count" in ql
    ) and "active" not in ql:
        return """
SELECT COUNT(*) AS total_merchants
FROM tbl_store;
""".strip()

    # ---- Total number of transactions for EACH merchant (grouped) ----
    if "transaction" in ql and "merchant" in ql and _has_merchant_grouping(question):
        return f"""
SELECT 
    s.store_id,
    s.doing_business_name,
    COUNT(*) AS total_transactions
FROM tbl_order o
JOIN tbl_payment p ON p.payment_id = o.payment_id
JOIN tbl_store s ON s.store_id = o.store_id
WHERE o.payment_status_id = 2
  {date_filter}
GROUP BY s.store_id, s.doing_business_name
ORDER BY total_transactions DESC;
""".strip()

    # ---- Total number of transactions of <merchant> [in date range] ----
    if "transaction" in ql and "merchant" not in ql:
        m = re.search(r"transactions?\s+(?:of|for)\s+([a-zA-Z0-9 &]+)", ql)
        merchant_name = m.group(1).strip() if m else None

        if merchant_name:
            return f"""
SELECT COUNT(*) AS total_transactions
FROM tbl_order o
JOIN tbl_payment p ON p.payment_id = o.payment_id
JOIN tbl_store s ON s.store_id = o.store_id
WHERE o.payment_status_id = 2
  AND LOWER(s.doing_business_name) LIKE '%{merchant_name.lower()}%'
  {date_filter};
""".strip()

    # No known business rule -> let generic/GPT handle
    return None


# =========================================================
# GENERIC SQL VIA GPT
# =========================================================
def extract_sql_from_text(text: str) -> str:
    code_block = re.search(r"```sql\s*(.*?)```", text, re.DOTALL | re.IGNORECASE)
    if code_block:
        return code_block.group(1).strip()
    return text.strip()


def build_generic_sql(question: str, schema: str) -> str:
    # Canonical base_lkr expression we want GPT to reuse
    base_lkr_expr = """
CASE
    WHEN o.processing_currency_id = 5 THEN o.total_amount
    ELSE
        CASE 
            WHEN o.exchange_rate IS NOT NULL 
                 AND o.exchange_rate NOT LIKE '' 
                 AND o.exchange_rate REGEXP '^[0-9]+(\\.[0-9]+)?$'
            THEN o.total_amount * o.exchange_rate
            ELSE o.total_amount * (
                SELECT er.buying_rate 
                FROM tbl_exchange_rate er
                WHERE er.currency_id = o.processing_currency_id
                  AND er.date <= DATE(p.date_time_transaction)
                ORDER BY er.date DESC 
                LIMIT 1
            )
        END
END
"""

    system_prompt = f"""
You are an expert MySQL SQL generator for a payment gateway system.

VERY IMPORTANT BUSINESS RULE ABOUT "TOTAL AMOUNT"/GMV:
- When the user asks for a single "total amount", "total value", or "GMV" across multiple currencies
  (or does not specify a single currency), you MUST:
  - Convert all currencies to LKR first, then sum.
  - Use this exact expression as the base LKR GMV per transaction:

    -- base_lkr (GMV in LKR for one transaction)
    {base_lkr_expr}

  - For total GMV/value in LKR, do: SUM(base_lkr) AS total_gmv_lkr (or a similar alias).
  - NEVER simply do SUM(o.total_amount) across mixed currencies.

- Only when the user EXPLICITLY asks for a single-currency GMV (e.g. "LKR GMV only", "USD GMV only"),
  you may restrict to that processing_currency_id and sum o.total_amount directly in that currency.

Other rules:
- Use ONLY tables/columns from this schema.
- Do NOT invent tables or columns.
- Generate exactly ONE SELECT query (no INSERT/UPDATE/DELETE/DDL).
- Prefer joining tbl_order, tbl_store, tbl_payment when relevant.
- For transaction date, prefer p.date_time_transaction over o.date_added.
- For merchant name, prefer s.doing_business_name over registered_name.
- For successful transactions, use o.payment_status_id = 2.

Schema:
{schema}
"""

    user_prompt = f"""
User question:
{question}

Write a single valid MySQL SELECT query that answers this question.
Follow the business rule about base_lkr for total GMV/total amount/value.
Do not explain, only output the SQL.
"""

    resp = client.chat.completions.create(
        model="gpt-4.1",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ],
        temperature=0.1,
    )
    raw = resp.choices[0].message.content
    return extract_sql_from_text(raw)


# =========================================================
# === LLM-ASSISTED ROUTING + REFINEMENT LAYER ============
# =========================================================
def build_base_sql_candidates(question: str, intent: dict) -> dict[str, str]:
    """
    Build a map of canonical/base queries keyed by a short name.
    These are the 'golden' queries whose core logic we want to preserve.
    The LLM will choose one of these and modify it, instead of inventing
    MDR/revenue logic from scratch.
    """
    ql = question.lower()
    candidates: dict[str, str] = {}

    # Primary metric-based candidates
    if intent["type"] == "revenue":
        candidates["revenue"] = build_revenue_sql(intent)

    if intent["type"] == "mdr":
        candidates["mdr"] = build_mdr_sql(intent)

    if intent["type"] == "gmv":
        candidates["gmv"] = build_gmv_sql(question, intent)

    if intent["type"] == "volume" or (
        intent["type"] == "count" and "transaction" in ql
    ):
        candidates["volume"] = build_volume_sql(question, intent)

    # Business shortcuts (bank MDR, merchants, usd→lkr, etc.)
    domain_sql = build_business_sql(question, intent)
    if domain_sql:
        candidates["business_rule"] = domain_sql

    # Schema inspection requests → no base query; handled by generic later

    return candidates


def refine_sql_with_llm(
    question: str,
    schema: str,
    intent: dict,
    base_sql_by_key: dict[str, str],
) -> str:
    """
    Core LLM step:

    - Given the user question
    - The detected intent
    - A set of canonical base queries keyed by name
    - The DB schema

    Ask GPT to:
      1) Pick the best matching base query (if any).
      2) Copy it and minimally modify it to satisfy the question
         (group by, breakdowns, comparisons, etc).
      3) Preserve the complex revenue/MDR/FX logic.
      4) If no base query is suitable, generate a new query from scratch
         using the schema.
    """
    # Prepare base query bundle
    if base_sql_by_key:
        base_sql_blob = "\n\n".join(
            f"-- KEY: {key}\n{sql}" for key, sql in base_sql_by_key.items()
        )
    else:
        base_sql_blob = "NONE"

    system_prompt = f"""
You are a senior MySQL query engineer for a payment gateway like WEBXPAY.

You will be given:
- The user's natural language question.
- A detected INTENT object from a rules engine.
- Several CANONICAL BASE QUERIES that are known-correct for things like revenue, MDR, volume, GMV, FX, etc.
- The database schema.

YOUR JOB:
1. First, deeply understand the user's question.
2. Decide if ONE of the base queries is a good starting point (same metric & joins).
3. If a base query fits:
   - COPY that query and minimally modify it to fully answer the question.
   - YOU MUST PRESERVE the core calculation logic: all CASE/WHEN blocks, FX logic,
     MDR/revenue formulas, and joins. You can:
       - Add or tweak WHERE filters (dates, merchants, currencies, etc.).
       - Add GROUP BY / ORDER BY / LIMIT.
       - Add extra selected columns (e.g., date breakdowns, merchant names).
       - Wrap in subqueries if needed for comparisons (e.g., previous period vs current).
   - DO NOT simplify or delete the MDR/revenue/FX expressions.
4. If NONE of the base queries apply:
   - Write a completely new SELECT based on the schema and question.
   - Still follow the rules below.

General rules:
- Use ONLY tables/columns from the schema.
- Prefer:
  - p.date_time_transaction for transaction date.
  - s.doing_business_name for merchant name.
- For "successful" transactions, use o.payment_status_id = 2.
- Generate EXACTLY ONE MySQL SELECT statement (no DDL, no INSERT/UPDATE/DELETE).

Output:
- Return ONLY the final SQL, wrapped in ```sql ... ```.

Schema:
{schema}
"""

    user_prompt = f"""
USER QUESTION:
{question}

DETECTED INTENT (from rules engine):
{intent}

AVAILABLE BASE QUERIES (may be NONE):
{base_sql_blob}
"""

    resp = client.chat.completions.create(
        model="gpt-4.1",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ],
        temperature=0.15,
    )
    raw = resp.choices[0].message.content or ""
    return extract_sql_from_text(raw)


# =========================================================
# MAIN ENTRY: GENERATE_SQL
# =========================================================
def generate_sql(question: str, schema: str | None = None) -> str:
    """
    New behaviour:

    1) Rules engine analyses intent (revenue, MDR, GMV, volume, merchants, etc.).
    2) We build one or more canonical BASE queries using your existing helpers.
    3) We pass the question + intent + base queries + schema into GPT,
       and ask it to:
         - choose a base query (if any),
         - preserve its core logic,
         - and modify it to match the user's exact requirement
           (breakdowns, comparisons, etc.).
    4) If no base query is suitable, GPT generates a fresh query using the schema.

    This means:
      - For "revenue" type questions it will start from build_revenue_sql.
      - For MDR, GMV, volume, merchants, bank MDR, USD→LKR it will keep
        your existing logic and just layer on GROUP BY / filters / comparisons.
      - If we truly have no domain rule, it falls back to pure LLM SQL.
    """
    if schema is None:
        schema = load_schema()

    intent = analyze_intent(question)

    # If it's purely a schema inspection question, skip base queries and just
    # ask the LLM to generate a schema-related SELECT.
    if intent["type"] == "schema":
        return build_generic_sql(question, schema)

    # Build base/canonical SQL candidates from your rules engine
    base_candidates = build_base_sql_candidates(question, intent)

    # If we have at least one base query, let LLM refine from that.
    if base_candidates:
        return refine_sql_with_llm(
            question=question,
            schema=schema,
            intent=intent,
            base_sql_by_key=base_candidates,
        )

    # If NOTHING matched, fall back to generic SQL generator using the schema.
    return build_generic_sql(question, schema)


# =========================================================
# INSIGHT + ANSWER LAYER (OPTIONAL)  **DEEPER ANALYSIS**
# =========================================================
def generate_insights(question: str, sql_result):
    """
    Produce a deeper, C-level friendly analysis.

    Structure:
    - Executive Summary (2–4 bullets)
    - Detailed Analysis (drivers, breakdowns, top contributors)
    - Trends & Patterns (if any time / date columns exist)
    - Risks & Anomalies (data quality / unusual values)
    - Recommended Actions (3–5 concrete next steps)

    The model is told to:
    - Rely ONLY on sql_result (no guessing from outside data).
    - Use Rs for LKR and USD for US dollars when relevant.
    - Format the answer as markdown with clear headings and bullets.
    """
    prompt = f"""
You are a senior data analyst for a payment gateway like WEBXPAY.

You are given:
- A natural language business question from a stakeholder.
- The SQL result returned from the database (as a Python-like list of dicts OR nested structure).

Your job is to write a DEEP, BUSINESS-FOCUSED insight summary.

--------------------
USER QUESTION
--------------------
{question}

--------------------
SQL RESULT (Python-like)
--------------------
{sql_result}

--------------------
INSTRUCTIONS
--------------------
1. Base ALL reasoning strictly on the sql_result. 
   - Do NOT assume external numbers or trends.
   - If something is not visible in the data, say it's "not available in this result".

2. Output in the following markdown structure:

### 1. Executive summary
- 2–4 bullet points directly answering the business question.
- Mention the key metrics with thousand separators (e.g., 8,176,220.45).
- Use 'Rs' for LKR amounts and 'USD' for US dollar amounts when you can infer the currency.

### 2. Detailed analysis
- Explain what is driving the result:
  - Call out any breakdowns visible in the columns (e.g., by currency, merchant, day, status).
  - If there is a ranking (e.g., top merchants, top days), mention the top 3–5 contributors and their approximate share.
  - If there are both GMV and revenue/MDR values, comment on the implied margin (revenue ÷ GMV) where possible.

### 3. Trends and patterns
- If there is any time-related column (date, day, month, year, etc.), describe:
  - Overall trend (increasing, decreasing, stable, spiky).
  - Any obvious peaks or drops and when they occur.
- If no time dimension exists, explicitly state that trend analysis is not possible from this result.

### 4. Risks, anomalies & data quality
- Point out any of the following if they appear:
  - Very high or very low outliers.
  - Large share of NULL/zero values.
  - Missing periods (e.g., gaps in dates).
- If nothing stands out, say the data looks consistent for this cut.

### 5. Recommended actions
- Give 3–5 concise, action-oriented bullets for business or operations.
  Examples:
  - "Investigate why merchant X has unusually low approval rate."
  - "Consider special pricing for top 5 merchants contributing >40% of GMV."
  - "Drill down into days with MDR below 0.7% to check for configuration issues."

3. Tone:
- Clear, direct, and suited for C-level / leadership.
- No SQL, no implementation details.
- Keep it focused on what the data *means* for the business.

If the result set is empty or clearly invalid, explain that and focus section 5 on checks to run (date range, filters, etc.).
"""

    resp = client.chat.completions.create(
        model="gpt-4.1",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.25,
    )
    return resp.choices[0].message.content.strip()


def build_short_answer(question: str, sql_result):
    """Short direct answers for the most common patterns."""
    if not sql_result:
        return "I couldn't find any matching records for that period."

    # If we get a dict of multiple result sets (overview mode), don't try short answer
    if isinstance(sql_result, dict):
        return None

    row = sql_result[0]
    ql = question.lower()

    # total usd revenue ...
    if "revenue" in ql and ("usd" in ql or "dollar" in ql or "dollars" in ql):
        val = row.get("total_revenue_usd")
        if val is None:
            return "There is no USD revenue for the selected period."
        try:
            v = float(val)
        except (TypeError, ValueError):
            return "I couldn't interpret the USD revenue value."
        return f"Total USD revenue for the selected period is ${v:,.2f}."

    # total usd value ...
    if "value" in ql and ("usd" in ql or "dollar" in ql or "dollars" in ql):
        val = row.get("total_usd_value") or row.get("total_gmv_usd") or row.get("total_gmv")
        if val is None:
            return "There is no USD value for the selected period."
        try:
            v = float(val)
        except (TypeError, ValueError):
            return "I couldn't interpret the USD value."
        return f"Total USD value for the selected period is ${v:,.2f}."

    # total lkr value ...
    if "value" in ql and ("lkr" in ql or "rs" in ql or "rupee" in ql or "rupees" in ql):
        val = row.get("total_lkr_value") or row.get("total_gmv_lkr") or row.get("total_gmv")
        if val is None:
            return "There is no LKR value for the selected period."
        try:
            v = float(val)
        except (TypeError, ValueError):
            return "I couldn't interpret the LKR value."
        return f"Total LKR value for the selected period is Rs {v:,.2f}."

    # total usd volume ...
    if "volume" in ql and ("usd" in ql or "dollar" in ql or "dollars" in ql):
        vol = row.get("total_usd_volume") or row.get("total_volume")
        if vol is None:
            return "There are no USD transactions for the selected period."
        return f"Total USD transaction volume for the selected period is {int(vol):,}."

    # total lkr volume ...
    if "volume" in ql and ("lkr" in ql or "rs" in ql or "rupee" in ql or "rupees" in ql):
        vol = row.get("total_lkr_volume") or row.get("total_volume")
        if vol is None:
            return "There are no LKR transactions for the selected period."
        return f"Total LKR transaction volume for the selected period is {int(vol):,}."

    # total volume (all currencies)
    if "volume" in ql and "usd" not in ql and "lkr" not in ql:
        vol = row.get("total_volume")
        if vol is not None:
            return f"Total transaction volume for the selected period is {int(vol):,}."

    # total bank rate / bank mdr ...
    if "bank rate" in ql or "bank mdr" in ql or ("bank" in ql and "mdr" in ql):
        val = row.get("total_bank_mdr_lkr")
        if val is None:
            return "There is no bank MDR for the selected period."
        try:
            v = float(val)
        except (TypeError, ValueError):
            return "I couldn't interpret the bank MDR value."
        return f"Total bank MDR for the selected period is Rs {v:,.2f}."

    # total mdr (overall PG MDR)
    if "mdr" in ql and "bank" not in ql:
        val = row.get("total_mdr_lkr")
        if val is None:
            return "There is no MDR for the selected period."
        try:
            v = float(val)
        except (TypeError, ValueError):
            return "I couldn't interpret the MDR value."
        return f"Total MDR for the selected period is Rs {v:,.2f}."

    # total usd to lkr ...
    if (
        re.search(r"usd\s+to\s+lkr", ql)
        or "usd value in lkr" in ql
        or "value of usd in lkr" in ql
        or "usd in lkr" in ql
    ):
        val = row.get("total_usd_to_lkr")
        if val is None:
            return "There is no USD-to-LKR value for the selected period."
        try:
            v = float(val)
        except (TypeError, ValueError):
            return "I couldn't interpret the USD-to-LKR value."
        return f"Total USD value converted to LKR for the selected period is Rs {v:,.2f}."

    return None


# =========================================================
# HIGH-LEVEL MODES: OVERVIEW / TRENDS (NO NEW SQL)
# =========================================================
def detect_high_level_mode(question: str) -> str | None:
    """
    Detect "out of the box" business questions that should trigger
    multi-query logic (but still using ONLY existing SQL builders).
    Returns: 'period_overview' | None
    """
    ql = question.lower()

    overview_keywords = [
        "useful information",
        "usefull information",
        "useful info",
        "overview",
        "summary",
        "insight",
        "insights",
        "analysis",
        "analytics",
        "performance",
        "how did we do",
        "how did we perform",
        "how did we do in",
        "how did we perform in",
        "overall picture",
        "overall view",
        "big picture",
        "trends",
        "trend for the year",
        "trend in",
        "yearly trend",
        "year trend",
        "month analysis",
        "analysis for",
        "tell me about",
        "tell me useful information",
    ]
    if any(k in ql for k in overview_keywords):
        return "period_overview"

    return None


def get_period_from_question(question: str):
    """
    Get (date_from, date_to) for overview-style questions.
    Uses extract_date_range, or falls back to year detection, or current year.
    """
    ds, de = extract_date_range(question)
    if ds and de:
        return ds, de

    m = re.search(r"\b(20\d{2})\b", question)
    if m:
        year = int(m.group(1))
        return f"{year}-01-01", f"{year+1}-01-01"

    now = datetime.now()
    return f"{now.year}-01-01", f"{now.year+1}-01-01"


def handle_period_overview(question: str, sql_executor):
    """
    For questions like:
      - "give me useful information on 2025"
      - "useful information on 2024"
      - "trends for the year 2025"
      - "overview for Jan 2025"
      - "how did we do in 2025?"

    We:
      - derive a date range from the question,
      - run several existing domain SQLs over that period (revenue, gmv, volume, mdr),
      - and let the LLM generate a narrative based ONLY on these results.
    """
    ds, de = get_period_from_question(question)

    # Revenue (GMV + revenue in LKR) over the period (total)
    revenue_intent = {
        "type": "revenue",
        "metric": "revenue",
        "date_start": ds,
        "date_end": de,
        "currency_id": None,
        "currency_name": None,
    }
    revenue_sql = build_revenue_sql(revenue_intent)
    revenue_rows = sql_executor(revenue_sql) or [{}]

    # GMV (all currencies, but combined in LKR via base_lkr)
    gmv_intent = {
        "type": "gmv",
        "metric": "gmv",
        "date_start": ds,
        "date_end": de,
        "currency_id": None,
        "currency_name": None,
    }
    # pass an empty question so we always get TOTAL GMV, not merchant-wise;
    # build_gmv_sql with no currencies now returns SUM(base_lkr) AS total_gmv_lkr
    gmv_sql = build_gmv_sql("", gmv_intent)
    gmv_rows = sql_executor(gmv_sql) or [{}]

    # Volume (all currencies)
    vol_intent = {
        "type": "volume",
        "metric": "count",
        "date_start": ds,
        "date_end": de,
        "currency_id": None,
        "currency_name": None,
    }
    vol_sql = build_volume_sql("", vol_intent)
    vol_rows = sql_executor(vol_sql) or [{}]

    # MDR (overall PG MDR in LKR)
    mdr_intent = {
        "type": "mdr",
        "metric": "mdr",
        "date_start": ds,
        "date_end": de,
        "currency_id": None,
        "currency_name": None,
    }
    mdr_sql = build_mdr_sql(mdr_intent)
    mdr_rows = sql_executor(mdr_sql) or [{}]

    # Aggregate result so LLM can write a business summary
    overview_result = {
        "period": {"date_from": ds, "date_to": de},
        "revenue": revenue_rows,
        "gmv": gmv_rows,
        "volume": vol_rows,
        "mdr": mdr_rows,
    }

    insights = generate_insights(
        f"{question} (period {ds} to {de})", overview_result
    )

    sql_block = {
        "revenue": revenue_sql,
        "gmv": gmv_sql,
        "volume": vol_sql,
        "mdr": mdr_sql,
    }

    return {
        "question": question,
        "sql": sql_block,
        "raw_result": overview_result,
        "answer": insights,
        "insights": insights,
    }


# =========================================================
# OPTIONAL END-TO-END HANDLER
# =========================================================
def handle_user_question(question: str, sql_executor):
    """
    sql_executor(sql: str) -> result (e.g., list[dict])

    Returns:
    {
        "question": ...,
        "sql": ...,
        "raw_result": ...,
        "answer": ...,
        "insights": ...
    }
    """

    # 1) High-level / overview / trend questions -> multi-query overview
    mode = detect_high_level_mode(question)
    if mode == "period_overview":
        return handle_period_overview(question, sql_executor)

    # 2) Normal path: single SQL from domain builders / generic LLM SQL
    schema = load_schema()
    sql = generate_sql(question, schema)
    result = sql_executor(sql)

    answer = build_short_answer(question, result)
    insights = None


    if answer is None:
        try:
            insights = generate_insights(question, result)
        except Exception:
            insights = None
        if insights:
            answer = insights
        else:
            answer = "I couldn't generate a detailed insight summary."

    return {
        "question": question,
        "sql": sql,
        "raw_result": result,
        "answer": answer,
        "insights": insights,
    }
