#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
ייצוא מחברי ספריא לאקסל עם כותרות וגיליונות בעברית.

מה הסקריפט אוסף:
1. רשימת המחברים מתוך:
   https://www.sefaria.org/topics/category/authors?tab=popular-writing-of
2. נתוני Topic מלאים לכל מחבר מתוך:
   https://www.sefaria.org/api/v2/topics/{slug}
   עם הדגלים:
   with_links=1, annotate_links=1, group_related=1,
   with_refs=1, with_indexes=1, annotate_time_period=1

פלט:
- מחברים_מרוכז          : שורה אחת לכל מחבר, כולל עמודות דינמיות לכל סוג קשר/ציטוט/מאפיין
- קשרים_מנורמלים        : שורה אחת לכל קשר בין המחבר לנושא אחר
- ציטוטים_ומקורות       : שורה אחת לכל אזכור/ציטוט/מקור
- חיבורים_וכתבים        : שורה אחת לכל חיבור/יצירה
- שגיאות                : מחברים שנכשלו
- הסבר                  : מידע על הקובץ והמקורות

תלויות:
    pip install requests beautifulsoup4 openpyxl

שימוש:
    python sefaria_authors_to_excel_he.py
    python sefaria_authors_to_excel_he.py -o מחברי_ספריא.xlsx
    python sefaria_authors_to_excel_he.py --max-authors 25 --sleep 0.2
"""

from __future__ import annotations

import argparse
import json
import re
import sys
import time
from collections import defaultdict
from typing import Any, Dict, Iterable, List
from urllib.parse import urljoin

import requests
from bs4 import BeautifulSoup
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Border, Side, Alignment
from openpyxl.utils import get_column_letter

BASE_URL = "https://www.sefaria.org"
AUTHORS_URL = f"{BASE_URL}/topics/category/authors?tab=popular-writing-of"
API_TOPIC_TEMPLATE = BASE_URL + "/api/v2/topics/{slug}"

API_PARAMS = {
    "with_links": 1,
    "annotate_links": 1,
    "group_related": 1,
    "with_refs": 1,
    "with_indexes": 1,
    "annotate_time_period": 1,
    "with_html": 0,
}

REQUEST_HEADERS = {
    "User-Agent": "Mozilla/5.0 (compatible; SefariaAuthorsExporterHE/1.1)",
    "Accept-Language": "he,en;q=0.9",
}

BASE_HEADER_MAP = {
    "slug": "מזהה_קצר",
    "topic_url": "קישור_לדף_הנושא",
    "api_url": "קישור_API",
    "listing_source_url": "קישור_לדף_הרשימה",
    "name_he": "שם_בעברית",
    "name_en": "שם_באנגלית",
    "primary_title_he": "כותרת_ראשית_עברית",
    "primary_title_en": "כותרת_ראשית_אנגלית",
    "card_name_he": "שם_בכרטיס_עברית",
    "card_name_en": "שם_בכרטיס_אנגלית",
    "card_description": "תיאור_כרטיס",
    "description_he": "תיאור_עברית",
    "description_en": "תיאור_אנגלית",
    "category_description_he": "תיאור_קטגוריה_עברית",
    "category_description_en": "תיאור_קטגוריה_אנגלית",
    "subclass": "תת_סוג",
    "data_source": "מקור_נתונים",
    "num_sources": "מספר_מקורות",
    "description_published": "התיאור_פורסם",
    "all_titles_he": "כל_הכותרות_בעברית",
    "all_titles_en": "כל_הכותרות_באנגלית",
    "works_count": "מספר_חיבורים",
    "works_titles_he": "כותרות_חיבורים_עברית",
    "works_titles_en": "כותרות_חיבורים_אנגלית",
    "works_urls": "קישורי_חיבורים",
    "works_descriptions_he": "תיאורי_חיבורים_עברית",
    "works_descriptions_en": "תיאורי_חיבורים_אנגלית",
    "author_slug": "מזהה_קצר_מחבר",
    "author_name_he": "שם_מחבר_עברית",
    "author_name_en": "שם_מחבר_אנגלית",
    "link_type_slug": "מזהה_סוג_קשר",
    "link_type_title": "כותרת_סוג_קשר",
    "link_type_plural_title": "כותרת_רבים_של_סוג_קשר",
    "related_slug": "מזהה_קצר_של_הקשור",
    "related_url": "קישור_לדף_הקשור",
    "related_title_he": "כותרת_קשור_עברית",
    "related_title_en": "כותרת_קשור_אנגלית",
    "related_description_he": "תיאור_קשור_עברית",
    "related_description_en": "תיאור_קשור_אנגלית",
    "should_display": "צריך_להציג",
    "order_json": "סדר_JSON",
    "pools_json": "מאגרים_JSON",
    "ref_type_slug": "מזהה_סוג_ציטוט",
    "ref_type_title": "כותרת_סוג_ציטוט",
    "ref_type_plural_title": "כותרת_רבים_של_סוג_ציטוט",
    "ref": "הפניה",
    "anchor_ref": "הפניה_מעגנת",
    "data_sources_slugs": "מזהי_מקורות_נתונים",
    "data_sources_json": "מקורות_נתונים_JSON",
    "is_sheet": "האם_זה_דף_מקורות",
    "expanded_refs_json": "הפניות_מורחבות_JSON",
    "similar_refs_json": "הפניות_דומות_JSON",
    "work_url": "קישור_לחיבור",
    "work_title_he": "כותרת_חיבור_עברית",
    "work_title_en": "כותרת_חיבור_אנגלית",
    "work_description_he": "תיאור_חיבור_עברית",
    "work_description_en": "תיאור_חיבור_אנגלית",
    "raw_json": "נתון_גולמי_JSON",
    "alt_ids_json": "מזהים_חלופיים_JSON",
    "raw_properties_json": "מאפיינים_גולמיים_JSON",
    "title_is_transliteration_en": "האם_הכותרת_הראשית_תעתיק_אנגלית",
    "title_is_transliteration_he": "האם_הכותרת_הראשית_תעתיק_עברית",
    "error": "שגיאה",
    "key": "מפתח",
    "value": "ערך",
}

PROPERTY_PART_MAP = {
    "birthplace": "מקום_לידה",
    "birthyear": "שנת_לידה",
    "birthyearisapprox": "שנת_לידה_משוערת",
    "deathplace": "מקום_פטירה",
    "deathyear": "שנת_פטירה",
    "deathyearisapprox": "שנת_פטירה_משוערת",
    "enbio": "ביוגרפיה_אנגלית",
    "ennlilink": "קישור_NLI_אנגלית",
    "enwikilink": "קישור_ויקיפדיה_אנגלית",
    "era": "תקופה",
    "generation": "דור",
    "hebio": "ביוגרפיה_עברית",
    "hebirthplace": "מקום_לידה_עברית",
    "hedeathplace": "מקום_פטירה_עברית",
    "henlilink": "קישור_NLI_עברית",
    "hewikilink": "קישור_ויקיפדיה_עברית",
    "jelink": "קישור_אנציקלופדיה_יהודית",
    "sex": "מין",
    "datasource": "מקור_נתון",
    "datasource": "מקור_נתון",
}

LINK_TYPE_MAP = {
    "child_in_law_of": "חתן_או_כלה_של",
    "child_of": "ילד_של",
    "corresponded_with": "התכתב_עם",
    "cousin_of": "בן_דוד_או_בת_דודה_של",
    "displays_under": "מוצג_תחת",
    "grandchild_of": "נכד_או_נכדה_של",
    "grandparent_of": "סב_או_סבתא_של",
    "has_member": "כולל_חבר",
    "has_sheets_related_to": "כולל_דפי_מקורות_קשורים",
    "is_a": "הוא_סוג_של",
    "learned_from": "למד_מ",
    "opposed": "התנגד_ל",
    "parent_in_law_of": "חותן_או_חותנת_של",
    "parent_of": "הורה_של",
    "related_to": "קשור_ל",
    "taught": "לימד_את",
}

REF_TYPE_MAP = {
    "about": "על",
    "mention": "אזכור",
    "popular_writing_of": "כתיבה_פופולרית_של",
}

SUFFIX_MAP = {
    "group_title": "כותרת_קבוצה",
    "group_plural_title": "כותרת_קבוצה_ברבים",
    "count": "כמות",
    "titles_en": "כותרות_אנגלית",
    "titles_he": "כותרות_עברית",
    "slugs": "מזהים_קצרים",
    "urls": "קישורים",
    "descriptions_en": "תיאורים_אנגלית",
    "descriptions_he": "תיאורים_עברית",
    "refs": "הפניות",
    "anchor_refs": "הפניות_מעגנות",
    "data_sources": "מקורות_נתונים",
}

README_LABEL_MAP = {
    "authors_list_url": "קישור_רשימת_מחברים",
    "topic_api_template": "תבנית_קישור_API_לנושא",
    "api_params_json": "פרמטרי_API_JSON",
    "notes": "הערות",
    "generated_rows_authors": "מספר_שורות_בגיליון_מחברים",
    "generated_rows_links": "מספר_שורות_בגיליון_קשרים",
    "generated_rows_refs": "מספר_שורות_בגיליון_ציטוטים",
    "generated_rows_works": "מספר_שורות_בגיליון_חיבורים",
    "errors": "מספר_שגיאות",
}

SHEET_TITLES = {
    "authors": "מחברים_מרוכז",
    "links": "קשרים_מנורמלים",
    "refs": "ציטוטים_ומקורות",
    "works": "חיבורים_וכתבים",
    "errors": "שגיאות",
    "readme": "הסבר",
}


# -----------------------------
# HTTP
# -----------------------------

def build_session() -> requests.Session:
    session = requests.Session()
    session.headers.update(REQUEST_HEADERS)
    adapter = requests.adapters.HTTPAdapter(max_retries=3)
    session.mount("http://", adapter)
    session.mount("https://", adapter)
    return session


def get_json(session: requests.Session, url: str, params: dict | None = None, timeout: int = 60) -> dict:
    resp = session.get(url, params=params, timeout=timeout)
    resp.raise_for_status()
    return resp.json()


def get_text(session: requests.Session, url: str, timeout: int = 60) -> str:
    resp = session.get(url, timeout=timeout)
    resp.raise_for_status()
    return resp.text


# -----------------------------
# Parsing helpers
# -----------------------------

def clean_text(value: Any) -> str:
    if value is None:
        return ""
    if isinstance(value, (dict, list)):
        return json.dumps(value, ensure_ascii=False)
    text = str(value)
    text = re.sub(r"\s+", " ", text).strip()
    return text


def unique_preserve(items: Iterable[str]) -> List[str]:
    seen = set()
    out = []
    for item in items:
        if item is None:
            continue
        item = clean_text(item)
        if not item or item in seen:
            continue
        seen.add(item)
        out.append(item)
    return out


def join_unique(items: Iterable[str], sep: str = " | ") -> str:
    return sep.join(unique_preserve(items))


def safe_slug_for_column(name: str) -> str:
    name = name.replace("-", "_")
    name = re.sub(r"[^0-9A-Za-z_]+", "_", name)
    name = re.sub(r"_+", "_", name).strip("_")
    return name.lower() or "unknown"


def to_hebrew_token(token: str) -> str:
    token = token.strip()
    if not token:
        return token
    low = token.lower()
    if low in PROPERTY_PART_MAP:
        return PROPERTY_PART_MAP[low]
    if low in LINK_TYPE_MAP:
        return LINK_TYPE_MAP[low]
    if low in REF_TYPE_MAP:
        return REF_TYPE_MAP[low]
    if low in SUFFIX_MAP:
        return SUFFIX_MAP[low]
    return token


def translate_header(header: str) -> str:
    if header in BASE_HEADER_MAP:
        return BASE_HEADER_MAP[header]

    if header.startswith("property__"):
        rest = header[len("property__"):]
        parts = [to_hebrew_token(part) for part in rest.split("__")]
        return "מאפיין__" + "__".join(parts)

    if header.startswith("links__"):
        parts = header.split("__")
        translated = ["קשרים"]
        translated.extend(to_hebrew_token(part) for part in parts[1:])
        return "__".join(translated)

    if header.startswith("refs__"):
        parts = header.split("__")
        translated = ["ציטוטים_ומקורות"]
        translated.extend(to_hebrew_token(part) for part in parts[1:])
        return "__".join(translated)

    return header


# -----------------------------
# Sefaria-specific extraction
# -----------------------------

def scrape_author_cards(session: requests.Session) -> List[dict]:
    html = get_text(session, AUTHORS_URL)
    soup = BeautifulSoup(html, "html.parser")

    authors: List[dict] = []
    for card in soup.select("div.card"):
        a = card.select_one("a.cardTitle[href]")
        if not a:
            continue

        href = a.get("href", "")
        if not href.startswith("/topics/"):
            continue
        slug = href.rstrip("/").split("/")[-1]
        if not slug or slug == "category":
            continue

        he_name = clean_text(a.get_text(" ", strip=True))
        en_name = clean_text(a.get("data-anl-text") or "")
        desc = card.select_one("div.cardDescription")
        card_desc = clean_text(desc.get_text(" ", strip=True) if desc else "")

        authors.append(
            {
                "slug": slug,
                "card_name_he": he_name,
                "card_name_en": en_name,
                "card_description": card_desc,
                "topic_url": urljoin(BASE_URL, href),
                "api_url": API_TOPIC_TEMPLATE.format(slug=slug),
                "listing_source_url": AUTHORS_URL,
            }
        )

    deduped = []
    seen = set()
    for row in authors:
        if row["slug"] in seen:
            continue
        seen.add(row["slug"])
        deduped.append(row)
    return deduped


def fetch_topic_payload(session: requests.Session, slug: str) -> dict:
    url = API_TOPIC_TEMPLATE.format(slug=slug)
    return get_json(session, url, params=API_PARAMS)


def extract_titles_by_lang(titles: List[dict], lang: str) -> List[str]:
    return [clean_text(t.get("text")) for t in titles if t.get("lang") == lang]


def extract_properties(raw: dict) -> dict:
    props = raw.get("properties", {}) or {}
    out = {}
    for key, obj in props.items():
        if isinstance(obj, dict):
            out[key] = obj.get("value")
            out[f"{key}__dataSource"] = obj.get("dataSource")
        else:
            out[key] = obj
    return out


def summarize_indexes(indexes: Any) -> dict:
    indexes = indexes or []
    works_en = []
    works_he = []
    work_urls = []
    work_desc_en = []
    work_desc_he = []

    for item in indexes:
        if isinstance(item, dict):
            title = item.get("title") or {}
            if isinstance(title, dict):
                works_en.append(clean_text(title.get("en")))
                works_he.append(clean_text(title.get("he")))
            else:
                works_en.append(clean_text(title))
            work_urls.append(clean_text(item.get("url")))
            desc = item.get("description") or {}
            if isinstance(desc, dict):
                work_desc_en.append(clean_text(desc.get("en")))
                work_desc_he.append(clean_text(desc.get("he")))
        else:
            works_en.append(clean_text(item))

    return {
        "works_count": len(indexes),
        "works_titles_en": join_unique(works_en),
        "works_titles_he": join_unique(works_he),
        "works_urls": join_unique(work_urls),
        "works_descriptions_en": join_unique(work_desc_en),
        "works_descriptions_he": join_unique(work_desc_he),
    }


def summarize_link_group(group: dict) -> dict:
    items = group.get("links", []) or []
    return {
        "count": len(items),
        "titles_en": join_unique(item.get("title", {}).get("en") for item in items),
        "titles_he": join_unique(item.get("title", {}).get("he") for item in items),
        "slugs": join_unique(item.get("topic") for item in items),
        "urls": join_unique(urljoin(BASE_URL, f"/topics/{item.get('topic')}") for item in items if item.get("topic")),
        "descriptions_en": join_unique((item.get("description") or {}).get("en") for item in items),
        "descriptions_he": join_unique((item.get("description") or {}).get("he") for item in items),
        "group_title": clean_text(group.get("title")),
        "group_plural_title": clean_text(group.get("pluralTitle")),
    }


def summarize_ref_group(group: dict) -> dict:
    items = group.get("refs", []) or []
    refs = []
    anchor_refs = []
    data_sources = []
    desc_en = []
    desc_he = []
    for item in items:
        refs.append(clean_text(item.get("ref")))
        anchor_refs.append(clean_text(item.get("anchorRef") or item.get("ref")))
        ds = item.get("dataSources") or {}
        if isinstance(ds, dict):
            data_sources.extend(ds.keys())
        elif isinstance(ds, list):
            data_sources.extend(ds)
        descs = item.get("descriptions") or {}
        if isinstance(descs, dict):
            en = descs.get("en")
            he = descs.get("he")
            if isinstance(en, dict):
                desc_en.append(clean_text(en.get("title") or en.get("prompt") or en))
            else:
                desc_en.append(clean_text(en))
            if isinstance(he, dict):
                desc_he.append(clean_text(he.get("title") or he.get("prompt") or he))
            else:
                desc_he.append(clean_text(he))

    return {
        "count": len(items),
        "refs": join_unique(refs),
        "anchor_refs": join_unique(anchor_refs),
        "data_sources": join_unique(data_sources),
        "descriptions_en": join_unique(desc_en),
        "descriptions_he": join_unique(desc_he),
        "group_title": clean_text(group.get("title")),
        "group_plural_title": clean_text(group.get("pluralTitle")),
    }


def build_wide_row(card: dict, payload: dict) -> dict:
    titles = payload.get("titles", []) or []
    props = extract_properties(payload)
    row = {
        "slug": card["slug"],
        "topic_url": card["topic_url"],
        "api_url": card["api_url"],
        "listing_source_url": card["listing_source_url"],
        "card_name_he": card.get("card_name_he", ""),
        "card_name_en": card.get("card_name_en", ""),
        "card_description": card.get("card_description", ""),
        "name_en": clean_text(payload.get("en")),
        "name_he": clean_text(payload.get("he")),
        "primary_title_en": clean_text((payload.get("primaryTitle") or {}).get("en")),
        "primary_title_he": clean_text((payload.get("primaryTitle") or {}).get("he")),
        "description_en": clean_text((payload.get("description") or {}).get("en")),
        "description_he": clean_text((payload.get("description") or {}).get("he")),
        "category_description_en": clean_text((payload.get("categoryDescription") or {}).get("en")),
        "category_description_he": clean_text((payload.get("categoryDescription") or {}).get("he")),
        "subclass": clean_text(payload.get("subclass")),
        "data_source": clean_text(payload.get("data_source")),
        "num_sources": payload.get("numSources"),
        "description_published": payload.get("description_published"),
        "title_is_transliteration_en": clean_text((payload.get("primaryTitleIsTransliteration") or {}).get("en")),
        "title_is_transliteration_he": clean_text((payload.get("primaryTitleIsTransliteration") or {}).get("he")),
        "all_titles_en": join_unique(extract_titles_by_lang(titles, "en")),
        "all_titles_he": join_unique(extract_titles_by_lang(titles, "he")),
        "alt_ids_json": clean_text(payload.get("alt_ids")),
        "pools_json": clean_text(payload.get("pools")),
        "raw_properties_json": clean_text(payload.get("properties")),
    }
    row.update({f"property__{safe_slug_for_column(k)}": clean_text(v) for k, v in props.items()})
    row.update(summarize_indexes(payload.get("indexes")))
    return row


def explode_links(slug: str, payload: dict) -> List[dict]:
    out = []
    links = payload.get("links", {}) or {}
    for link_type, group in links.items():
        group_title = clean_text(group.get("title"))
        group_plural_title = clean_text(group.get("pluralTitle"))
        for item in group.get("links", []) or []:
            out.append(
                {
                    "author_slug": slug,
                    "author_name_en": clean_text(payload.get("en")),
                    "author_name_he": clean_text(payload.get("he")),
                    "link_type_slug": link_type,
                    "link_type_title": group_title,
                    "link_type_plural_title": group_plural_title,
                    "related_slug": clean_text(item.get("topic")),
                    "related_url": urljoin(BASE_URL, f"/topics/{item.get('topic')}") if item.get("topic") else "",
                    "related_title_en": clean_text((item.get("title") or {}).get("en")),
                    "related_title_he": clean_text((item.get("title") or {}).get("he")),
                    "related_description_en": clean_text((item.get("description") or {}).get("en")),
                    "related_description_he": clean_text((item.get("description") or {}).get("he")),
                    "should_display": clean_text(item.get("shouldDisplay")),
                    "order_json": clean_text(item.get("order")),
                    "pools_json": clean_text(item.get("pools")),
                }
            )
    return out


def explode_refs(slug: str, payload: dict) -> List[dict]:
    out = []
    refs = payload.get("refs", {}) or {}
    for ref_type, group in refs.items():
        group_title = clean_text(group.get("title"))
        group_plural_title = clean_text(group.get("pluralTitle"))
        for item in group.get("refs", []) or []:
            ds = item.get("dataSources") or {}
            out.append(
                {
                    "author_slug": slug,
                    "author_name_en": clean_text(payload.get("en")),
                    "author_name_he": clean_text(payload.get("he")),
                    "ref_type_slug": ref_type,
                    "ref_type_title": group_title,
                    "ref_type_plural_title": group_plural_title,
                    "ref": clean_text(item.get("ref")),
                    "anchor_ref": clean_text(item.get("anchorRef") or item.get("ref")),
                    "is_sheet": clean_text(item.get("is_sheet")),
                    "expanded_refs_json": clean_text(item.get("expandedRefs")),
                    "data_sources_slugs": join_unique(ds.keys() if isinstance(ds, dict) else ds),
                    "data_sources_json": clean_text(ds),
                    "description_en": clean_text(((item.get("descriptions") or {}).get("en"))),
                    "description_he": clean_text(((item.get("descriptions") or {}).get("he"))),
                    "order_json": clean_text(item.get("order")),
                    "similar_refs_json": clean_text(item.get("similarRefs")),
                }
            )
    return out


def explode_works(slug: str, payload: dict) -> List[dict]:
    out = []
    for item in payload.get("indexes", []) or []:
        title = item.get("title") or {}
        desc = item.get("description") or {}
        out.append(
            {
                "author_slug": slug,
                "author_name_en": clean_text(payload.get("en")),
                "author_name_he": clean_text(payload.get("he")),
                "work_url": clean_text(item.get("url")),
                "work_title_en": clean_text(title.get("en") if isinstance(title, dict) else title),
                "work_title_he": clean_text(title.get("he") if isinstance(title, dict) else ""),
                "work_description_en": clean_text(desc.get("en") if isinstance(desc, dict) else desc),
                "work_description_he": clean_text(desc.get("he") if isinstance(desc, dict) else ""),
                "raw_json": clean_text(item),
            }
        )
    return out


# -----------------------------
# Workbook helpers
# -----------------------------

def style_header_row(ws):
    header_fill = PatternFill("solid", fgColor="1F4E78")
    header_font = Font(color="FFFFFF", bold=True)
    thin = Side(style="thin", color="D9D9D9")
    for cell in ws[1]:
        cell.fill = header_fill
        cell.font = header_font
        cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
        cell.border = Border(bottom=thin)
    ws.freeze_panes = "A2"
    ws.auto_filter.ref = ws.dimensions
    ws.sheet_view.rightToLeft = True


def style_data_rows(ws):
    for row in ws.iter_rows(min_row=2):
        for cell in row:
            cell.alignment = Alignment(horizontal="right", vertical="top", wrap_text=True)


def auto_fit_columns(ws, max_width: int = 70):
    dims = defaultdict(int)
    for row in ws.iter_rows():
        for cell in row:
            val = "" if cell.value is None else str(cell.value)
            dims[cell.column] = min(max(dims[cell.column], len(val) + 2), max_width)
    for col_idx, width in dims.items():
        ws.column_dimensions[get_column_letter(col_idx)].width = max(12, width)


def write_sheet(ws, rows: List[dict], preferred_order: List[str] | None = None):
    preferred_order = preferred_order or []
    keys = []
    seen = set()
    for k in preferred_order:
        if k in seen:
            continue
        seen.add(k)
        keys.append(k)
    for row in rows:
        for k in row.keys():
            if k in seen:
                continue
            seen.add(k)
            keys.append(k)

    if not keys:
        ws.append(["מידע"])
        ws.append(["אין שורות נתונים"])
        style_header_row(ws)
        style_data_rows(ws)
        auto_fit_columns(ws)
        return

    translated_headers = [translate_header(k) for k in keys]
    ws.append(translated_headers)
    for row in rows:
        ws.append([row.get(k, "") for k in keys])

    style_header_row(ws)
    style_data_rows(ws)
    auto_fit_columns(ws)


# -----------------------------
# Main flow
# -----------------------------

def main():
    parser = argparse.ArgumentParser(description="Export Sefaria authors to Excel with Hebrew headers")
    parser.add_argument("-o", "--output", default="sefaria_authors_he.xlsx", help="Output Excel path")
    parser.add_argument("--max-authors", type=int, default=None, help="Limit authors for testing")
    parser.add_argument("--sleep", type=float, default=0.15, help="Delay between API calls")
    args = parser.parse_args()

    session = build_session()

    print("Fetching author list...", file=sys.stderr)
    author_cards = scrape_author_cards(session)
    if args.max_authors:
        author_cards = author_cards[: args.max_authors]
    print(f"Found {len(author_cards)} authors", file=sys.stderr)

    wide_rows: List[dict] = []
    link_rows: List[dict] = []
    ref_rows: List[dict] = []
    work_rows: List[dict] = []
    error_rows: List[dict] = []
    raw_payloads: Dict[str, dict] = {}

    all_link_types = set()
    all_ref_types = set()
    all_property_cols = set()

    for i, card in enumerate(author_cards, start=1):
        slug = card["slug"]
        try:
            payload = fetch_topic_payload(session, slug)
            raw_payloads[slug] = payload
            wide_row = build_wide_row(card, payload)
            wide_rows.append(wide_row)
            all_property_cols.update(k for k in wide_row.keys() if k.startswith("property__"))

            links = payload.get("links", {}) or {}
            refs = payload.get("refs", {}) or {}
            all_link_types.update(links.keys())
            all_ref_types.update(refs.keys())

            link_rows.extend(explode_links(slug, payload))
            ref_rows.extend(explode_refs(slug, payload))
            work_rows.extend(explode_works(slug, payload))

            print(f"[{i}/{len(author_cards)}] OK  {slug}", file=sys.stderr)
        except Exception as e:
            error_rows.append(
                {
                    "slug": slug,
                    "topic_url": card.get("topic_url", ""),
                    "api_url": card.get("api_url", ""),
                    "error": clean_text(e),
                }
            )
            print(f"[{i}/{len(author_cards)}] ERR {slug} -> {e}", file=sys.stderr)
        if args.sleep > 0:
            time.sleep(args.sleep)

    for row in wide_rows:
        slug = row["slug"]
        payload = raw_payloads.get(slug, {})

        for link_type in sorted(all_link_types):
            col_prefix = f"links__{safe_slug_for_column(link_type)}"
            summary = summarize_link_group((payload.get("links", {}) or {}).get(link_type, {}) or {})
            row[f"{col_prefix}__group_title"] = summary["group_title"]
            row[f"{col_prefix}__group_plural_title"] = summary["group_plural_title"]
            row[f"{col_prefix}__count"] = summary["count"]
            row[f"{col_prefix}__titles_en"] = summary["titles_en"]
            row[f"{col_prefix}__titles_he"] = summary["titles_he"]
            row[f"{col_prefix}__slugs"] = summary["slugs"]
            row[f"{col_prefix}__urls"] = summary["urls"]
            row[f"{col_prefix}__descriptions_en"] = summary["descriptions_en"]
            row[f"{col_prefix}__descriptions_he"] = summary["descriptions_he"]

        for ref_type in sorted(all_ref_types):
            col_prefix = f"refs__{safe_slug_for_column(ref_type)}"
            summary = summarize_ref_group((payload.get("refs", {}) or {}).get(ref_type, {}) or {})
            row[f"{col_prefix}__group_title"] = summary["group_title"]
            row[f"{col_prefix}__group_plural_title"] = summary["group_plural_title"]
            row[f"{col_prefix}__count"] = summary["count"]
            row[f"{col_prefix}__refs"] = summary["refs"]
            row[f"{col_prefix}__anchor_refs"] = summary["anchor_refs"]
            row[f"{col_prefix}__data_sources"] = summary["data_sources"]
            row[f"{col_prefix}__descriptions_en"] = summary["descriptions_en"]
            row[f"{col_prefix}__descriptions_he"] = summary["descriptions_he"]

    wb = Workbook()
    wb.remove(wb.active)

    preferred_wide = [
        "slug", "topic_url", "api_url", "listing_source_url",
        "name_he", "name_en", "primary_title_he", "primary_title_en",
        "card_name_he", "card_name_en", "card_description",
        "description_he", "description_en",
        "category_description_he", "category_description_en",
        "subclass", "data_source", "num_sources", "description_published",
        "all_titles_he", "all_titles_en",
        "works_count", "works_titles_he", "works_titles_en", "works_urls",
        "works_descriptions_he", "works_descriptions_en",
    ]

    ws = wb.create_sheet(SHEET_TITLES["authors"])
    write_sheet(ws, wide_rows, preferred_order=preferred_wide + sorted(all_property_cols))

    ws = wb.create_sheet(SHEET_TITLES["links"])
    write_sheet(ws, link_rows, preferred_order=[
        "author_slug", "author_name_he", "author_name_en",
        "link_type_slug", "link_type_title", "link_type_plural_title",
        "related_slug", "related_url", "related_title_he", "related_title_en",
        "related_description_he", "related_description_en", "should_display",
        "order_json", "pools_json",
    ])

    ws = wb.create_sheet(SHEET_TITLES["refs"])
    write_sheet(ws, ref_rows, preferred_order=[
        "author_slug", "author_name_he", "author_name_en",
        "ref_type_slug", "ref_type_title", "ref_type_plural_title",
        "ref", "anchor_ref", "description_he", "description_en",
        "data_sources_slugs", "is_sheet", "expanded_refs_json",
        "data_sources_json", "order_json", "similar_refs_json",
    ])

    ws = wb.create_sheet(SHEET_TITLES["works"])
    write_sheet(ws, work_rows, preferred_order=[
        "author_slug", "author_name_he", "author_name_en",
        "work_url", "work_title_he", "work_title_en",
        "work_description_he", "work_description_en", "raw_json",
    ])

    ws = wb.create_sheet(SHEET_TITLES["errors"])
    write_sheet(ws, error_rows, preferred_order=["slug", "topic_url", "api_url", "error"])

    ws = wb.create_sheet(SHEET_TITLES["readme"])
    ws.append([translate_header("key"), translate_header("value")])
    readme_rows = [
        ("authors_list_url", AUTHORS_URL),
        ("topic_api_template", API_TOPIC_TEMPLATE),
        ("api_params_json", json.dumps(API_PARAMS, ensure_ascii=False)),
        ("notes", "גיליון המחברים כולל עמודות דינמיות לכל סוג מאפיין, קשר, וציטוט שהתגלה אצל אחד המחברים."),
        ("generated_rows_authors", len(wide_rows)),
        ("generated_rows_links", len(link_rows)),
        ("generated_rows_refs", len(ref_rows)),
        ("generated_rows_works", len(work_rows)),
        ("errors", len(error_rows)),
    ]
    for k, v in readme_rows:
        ws.append([README_LABEL_MAP.get(k, k), v])
    style_header_row(ws)
    style_data_rows(ws)
    auto_fit_columns(ws)

    wb.save(args.output)
    print(f"Saved: {args.output}", file=sys.stderr)


if __name__ == "__main__":
    main()