"""
Afton UF Forensic Analyzer — READ-ONLY
=======================================
For each of the 214 open UF entries, pulls full Payment/SalesReceipt detail
and clusters by:
  - CreateTime (was there a bulk catch-up entry session?)
  - PaymentMethod (check / cash / CC / etc.)
  - TxnSource (where the txn came from — Thryv, QBO Payments, manual)
  - LastUpdatedTime vs CreateTime (recently touched?)
  - PrivateNote / Memo content (any audit clues?)

Goal: differentiate
  (A) Real customer pmts awaiting deposit (cash/check on hand)
  (B) Phantom catch-up entries for AR clearing (no real money flow)
  (C) Deposited elsewhere (non-RBFCU account)

Output:
  afton_uf_forensics_report.md
  afton_uf_forensics.json
"""
from __future__ import annotations

import json
import sys
from collections import Counter, defaultdict
from datetime import datetime
from pathlib import Path

if hasattr(sys.stdout, "reconfigure"):
    sys.stdout.reconfigure(encoding="utf-8", errors="replace")

SCRIPT_DIR = Path(__file__).resolve().parent
QBO_APP_DIR = SCRIPT_DIR.parent.parent / "Atlas" / "app"
sys.path.insert(0, str(QBO_APP_DIR))
import qbo_client as qbo  # type: ignore

CLIENT = "afton"
UF_JSON = SCRIPT_DIR / "afton_uf_cleanup_worksheet.json"
OUT_MD = SCRIPT_DIR / "afton_uf_forensics_report.md"
OUT_JSON = SCRIPT_DIR / "afton_uf_forensics.json"


def batch_query(token: str, entity: str, ids: list[str]) -> dict[str, dict]:
    out: dict[str, dict] = {}
    BATCH = 30
    for i in range(0, len(ids), BATCH):
        chunk = ids[i : i + BATCH]
        in_clause = ", ".join(f"'{x}'" for x in chunk)
        sql = f"SELECT * FROM {entity} WHERE Id IN ({in_clause})"
        resp = qbo.query(CLIENT, token, sql)
        for r in resp.get(entity, []):
            out[str(r["Id"])] = r
    return out


def main() -> None:
    data = json.load(open(UF_JSON, encoding="utf-8"))
    open_entries = data["open_entries"]
    print(f"Loaded {len(open_entries)} open UF entries")

    token = qbo.refresh_access_token(CLIENT)

    payment_ids = [e["id"] for e in open_entries if e["entity"] == "Payment"]
    sr_ids = [e["id"] for e in open_entries if e["entity"] == "SalesReceipt"]
    print(f"Pulling {len(payment_ids)} payments + {len(sr_ids)} sales receipts...")
    payments = batch_query(token, "Payment", payment_ids)
    sales_receipts = batch_query(token, "SalesReceipt", sr_ids)

    # Resolve PaymentMethod entities for ID → name map
    print("Pulling PaymentMethod definitions...")
    pm_resp = qbo.query(CLIENT, token, "SELECT * FROM PaymentMethod")
    payment_methods = {str(m["Id"]): m for m in pm_resp.get("PaymentMethod", [])}
    print(f"  {len(payment_methods)} payment methods on file")

    # Forensic data per entry
    forensic_rows = []
    for e in open_entries:
        ent_data = payments.get(e["id"]) if e["entity"] == "Payment" else sales_receipts.get(e["id"])
        if not ent_data:
            continue
        meta = ent_data.get("MetaData", {})
        pm_id = (ent_data.get("PaymentMethodRef") or {}).get("value")
        pm_name = payment_methods.get(pm_id, {}).get("Name") if pm_id else None

        forensic_rows.append({
            "entity": e["entity"],
            "id": e["id"],
            "customer": e["customer"],
            "txn_date": e["txn_date"],
            "amount": e["amount"],
            "create_time": meta.get("CreateTime"),
            "last_updated": meta.get("LastUpdatedTime"),
            "create_day": (meta.get("CreateTime") or "")[:10],
            "create_hour": (meta.get("CreateTime") or "")[:13],
            "txn_source": ent_data.get("TxnSource"),
            "payment_ref_num": ent_data.get("PaymentRefNum") or ent_data.get("DocNumber"),
            "payment_method_id": pm_id,
            "payment_method_name": pm_name,
            "private_note": ent_data.get("PrivateNote"),
            "has_cc": bool(ent_data.get("CreditCardPayment")),
            "deposit_acct": (ent_data.get("DepositToAccountRef") or {}).get("value"),
            "process_payment": ent_data.get("ProcessPayment"),
        })

    # ── Aggregates ────────────────────────────────────────────────────────
    create_day_counts = Counter(r["create_day"] for r in forensic_rows)
    create_hour_counts = Counter(r["create_hour"] for r in forensic_rows)
    pm_counts: dict = defaultdict(lambda: {"count": 0, "total": 0.0})
    for r in forensic_rows:
        key = f"{r['payment_method_id'] or 'NONE'} - {r['payment_method_name'] or '(no method)'}"
        pm_counts[key]["count"] += 1
        pm_counts[key]["total"] += r["amount"] or 0
    src_counts: dict = defaultdict(lambda: {"count": 0, "total": 0.0})
    for r in forensic_rows:
        key = r["txn_source"] or "(no source)"
        src_counts[key]["count"] += 1
        src_counts[key]["total"] += r["amount"] or 0

    # Look for catch-up clusters: any single day with 10+ creates
    catch_up_days = [(d, c) for d, c in create_day_counts.items() if c >= 10]
    catch_up_days.sort(key=lambda x: -x[1])

    # Same-day entry vs backdated
    same_day_count = 0
    backdated_count = 0
    backdated_days_distribution: Counter = Counter()
    for r in forensic_rows:
        if not r["create_time"] or not r["txn_date"]:
            continue
        try:
            ct = datetime.fromisoformat(r["create_time"].replace("Z", "+00:00")).date()
            td = datetime.fromisoformat(r["txn_date"]).date()
            delta = (ct - td).days
            if delta <= 1:
                same_day_count += 1
            else:
                backdated_count += 1
                # bucket the lag
                if delta < 7:
                    backdated_days_distribution["1-6 days"] += 1
                elif delta < 30:
                    backdated_days_distribution["7-29 days"] += 1
                elif delta < 90:
                    backdated_days_distribution["30-89 days"] += 1
                elif delta < 365:
                    backdated_days_distribution["90-364 days"] += 1
                else:
                    backdated_days_distribution["365+ days"] += 1
        except Exception:
            pass

    # ── Output ─────────────────────────────────────────────────────────────
    payload = {
        "generated_at": datetime.utcnow().isoformat() + "Z",
        "n_entries": len(forensic_rows),
        "rows": forensic_rows,
        "create_day_counts": dict(create_day_counts),
        "catch_up_days_10plus": catch_up_days,
        "payment_method_breakdown": dict(pm_counts),
        "txn_source_breakdown": dict(src_counts),
        "same_day_count": same_day_count,
        "backdated_count": backdated_count,
        "backdated_lag_distribution": dict(backdated_days_distribution),
    }
    OUT_JSON.write_text(json.dumps(payload, indent=2, default=str), encoding="utf-8")

    # Markdown
    lines = [
        "# Afton UF Forensic Report",
        f"_Generated: {datetime.utcnow().isoformat()}Z_",
        "",
        f"Analyzed {len(forensic_rows)} open UF entries.",
        "",
        "## Same-day entry vs. backdated entry",
        "",
        f"- **Same-day** (create within ±1 day of txn_date): {same_day_count} entries",
        f"- **Backdated** (create lags txn_date by 2+ days): {backdated_count} entries",
        "",
        "**Backdating lag distribution:**",
        "",
        "| Lag | Count |",
        "|---|---|",
    ]
    for k, v in backdated_days_distribution.most_common():
        lines.append(f"| {k} | {v} |")
    lines.append("")

    lines.append("## Catch-up clusters (10+ entries created on same day)")
    lines.append("")
    if not catch_up_days:
        lines.append("_No bulk catch-up days detected._")
    else:
        lines.append("| Create Day | Entries |")
        lines.append("|---|---|")
        for d, c in catch_up_days:
            lines.append(f"| {d} | {c} |")
    lines.append("")

    lines.append("## Payment method breakdown")
    lines.append("")
    lines.append("| Method | Count | Total |")
    lines.append("|---|---|---|")
    for k, v in sorted(pm_counts.items(), key=lambda x: -x[1]["total"]):
        lines.append(f"| {k} | {v['count']} | ${v['total']:,.2f} |")
    lines.append("")

    lines.append("## TxnSource breakdown")
    lines.append("")
    lines.append("| Source | Count | Total |")
    lines.append("|---|---|---|")
    for k, v in sorted(src_counts.items(), key=lambda x: -x[1]["total"]):
        lines.append(f"| {k} | {v['count']} | ${v['total']:,.2f} |")
    lines.append("")

    lines.append("## Top 20 create-days by entry count")
    lines.append("")
    lines.append("| Create Day | Entries |")
    lines.append("|---|---|")
    for d, c in create_day_counts.most_common(20):
        lines.append(f"| {d} | {c} |")

    OUT_MD.write_text("\n".join(lines), encoding="utf-8")
    print(f"\nWrote {OUT_MD}")
    print(f"Wrote {OUT_JSON}")
    print(f"\n=== HEADLINES ===")
    print(f"  Same-day entries:  {same_day_count}")
    print(f"  Backdated entries: {backdated_count}")
    if catch_up_days:
        print(f"  Catch-up days (10+): {len(catch_up_days)}")
        for d, c in catch_up_days[:5]:
            print(f"    {d}: {c} entries")


if __name__ == "__main__":
    main()
