import os
import re
import json
import math
import time
import sqlite3
import threading
import zipfile
import hashlib
from dataclasses import dataclass
from collections import deque
from pathlib import Path
from typing import Dict, Optional, Set, List, Tuple

import numpy as np
import faiss
from flask import Flask, request, render_template_string, redirect, jsonify
from huggingface_hub import snapshot_download

# =========================
# CONFIG
# =========================
HF_REPO = "ArieLLL123/otzaria-embeddings"
DEFAULT_DB_PATH = r"C:\אוצריא\אוצריא\seforim.db"

EDITION_PATHS = {
    "v1": "editions/otzaria_embeddings_v1",
    "v2": "editions/otzaria_embeddings_v2",
    "v3": "editions/otzaria_embeddings_v3",
}

BASE_DIR = os.path.dirname(__file__)
CACHE_DIR = os.path.join(BASE_DIR, "hf_cache")
RUNTIME_DIR = os.path.join(BASE_DIR, "runtime")

MODELS_ZIPS_DIR = os.path.join(BASE_DIR, "models_zips")   # כאן המשתמשים שמים ZIP
LOCAL_MODELS_DIR = os.path.join(BASE_DIR, "local_models") # כאן מחלצים פעם אחת

SETTINGS_PATH = os.path.join(RUNTIME_DIR, "settings.json")

os.makedirs(CACHE_DIR, exist_ok=True)
os.makedirs(RUNTIME_DIR, exist_ok=True)
os.makedirs(MODELS_ZIPS_DIR, exist_ok=True)
os.makedirs(LOCAL_MODELS_DIR, exist_ok=True)

# פרמטרים לחיתוך טקסט
DEFAULT_WINDOW_LINES = 6
DEFAULT_STRIDE = 3

# =========================
# TEXT TOOLS & HEBREW NLP
# =========================
NIQQUD_RE   = re.compile(r"[\u0591-\u05C7]")
HTML_TAG_RE = re.compile(r"<[^>]+>")
NON_WORD_RE = re.compile(r"[^0-9A-Za-z\u0590-\u05FF\"']+")

def clean_text(s: str) -> str:
    """ניקוי טקסט בסיסי - מסיר ניקוד וHTML"""
    if not s:
        return ""
    s = HTML_TAG_RE.sub(" ", s)
    s = NIQQUD_RE.sub("", s)
    s = s.replace('״', '"').replace('׳', "'")
    s = NON_WORD_RE.sub(" ", s)
    return " ".join(s.split())

def hebrew_stem(word: str) -> str:
    """סטמינג נאיבי לעברית (קידומות נפוצות)"""
    if len(word) < 4:
        return word
    prefixes = ['וכש', 'וש', 'וה', 'וב', 'ול', 'ומ', 'כש', 'שב', 'שה', 'מש', 'מה', 'ו', 'ה', 'ב', 'ל', 'מ', 'ש', 'כ']
    for p in prefixes:
        if word.startswith(p) and len(word) > len(p) + 2:
            return word[len(p):]
    return word

def get_tokens(text: str) -> Set[str]:
    words = clean_text(text).split()
    return {hebrew_stem(w) for w in words if w}

def fts_query_from_text(q_clean: str) -> str:
    toks = [t for t in clean_text(q_clean).split() if len(t) > 1]
    return " ".join(toks) if toks else ""

# =========================
# SETTINGS PERSISTENCE
# =========================
def load_settings() -> dict:
    if not os.path.exists(SETTINGS_PATH):
        return {}
    try:
        with open(SETTINGS_PATH, "r", encoding="utf-8") as f:
            return json.load(f) or {}
    except:
        return {}

def save_settings(data: dict) -> None:
    try:
        with open(SETTINGS_PATH, "w", encoding="utf-8") as f:
            json.dump(data, f, ensure_ascii=False, indent=2)
    except:
        pass

# =========================
# ZIP MODEL SUPPORT
# =========================
def sha256_file(path: str) -> str:
    h = hashlib.sha256()
    with open(path, "rb") as f:
        for chunk in iter(lambda: f.read(1024 * 1024), b""):
            h.update(chunk)
    return h.hexdigest()

def ensure_zip_extracted(zip_path: str) -> str:
    """
    מחלץ ZIP לתיקיית local_models לפי hash,
    כדי שאם מחליפים ZIP – זה לא ידרוס אלא ייצור תיקייה חדשה.
    """
    if not os.path.exists(zip_path):
        raise FileNotFoundError(f"ZIP לא נמצא: {zip_path}")

    zhash = sha256_file(zip_path)[:16]
    target_dir = os.path.join(LOCAL_MODELS_DIR, zhash)
    marker = os.path.join(target_dir, ".extracted_ok")

    if os.path.exists(marker):
        return target_dir

    os.makedirs(target_dir, exist_ok=True)

    # חילוץ בטוח: מונע Zip Slip
    with zipfile.ZipFile(zip_path, "r") as z:
        for member in z.infolist():
            member_path = os.path.join(target_dir, member.filename)
            abs_target = os.path.abspath(target_dir)
            abs_member = os.path.abspath(member_path)
            if not abs_member.startswith(abs_target + os.sep) and abs_member != abs_target:
                raise RuntimeError("ZIP לא תקין (path traversal).")
        z.extractall(target_dir)

    with open(marker, "w", encoding="utf-8") as f:
        f.write(time.strftime("%Y-%m-%d %H:%M:%S"))

    return target_dir

def find_model_files(root_dir: str, edition: str) -> tuple[str, str]:
    """
    מחפש בתוך root_dir את:
    - vocab.json
    - embeddings_last.npy
    Prefer: נתיב שמכיל את שם ה-edition folder אם קיים
    """
    candidates_vocab = list(Path(root_dir).rglob("vocab.json"))
    candidates_emb   = list(Path(root_dir).rglob("embeddings_last.npy"))

    if not candidates_vocab or not candidates_emb:
        raise FileNotFoundError(
            "לא מצאתי בתוך ה-ZIP את vocab.json ו/או embeddings_last.npy.\n"
            "ודא שהקבצים קיימים בזיפ."
        )

    # עדיפות לתיקייה שמכילה את otzaria_embeddings_vX אם יש
    prefer_key = f"otzaria_embeddings_{edition}".lower()

    def pick(cands):
        for p in cands:
            if prefer_key in str(p).lower():
                return str(p)
        return str(cands[0])

    return pick(candidates_vocab), pick(candidates_emb)

# =========================
# DATABASE & STREAMING
# =========================
def get_book_titles(db_path: str) -> Dict[int, str]:
    titles = {}
    if not os.path.exists(db_path):
        return titles
    try:
        con = sqlite3.connect(db_path)
        try:
            cur = con.execute("SELECT id, heTitle FROM books")
        except:
            cur = con.execute("SELECT id, title FROM books")
        for r in cur:
            titles[r[0]] = r[1]
        con.close()
    except Exception as e:
        print(f"Error loading titles: {e}")
    return titles

def iter_rows_ordered(db_path: str, chunk_rows: int = 20000):
    con = sqlite3.connect(db_path)
    con.row_factory = sqlite3.Row
    con.execute("PRAGMA journal_mode=OFF;")

    table_name = "line"
    try:
        con.execute("SELECT 1 FROM lines LIMIT 1")
        table_name = "lines"
    except:
        pass

    try:
        con.execute(f"SELECT 1 FROM {table_name} LIMIT 1")
    except:
        con.close()
        return

    q = f"""
    SELECT id, bookId, lineIndex, content
    FROM {table_name}
    WHERE content IS NOT NULL AND content != ''
    ORDER BY bookId, lineIndex
    """
    cur = con.execute(q)
    while True:
        rows = cur.fetchmany(chunk_rows)
        if not rows:
            break
        yield rows
    con.close()

def iter_chunks(db_path: str, max_chunks: int, window_lines: int, stride: int):
    rows_iter = iter_rows_ordered(db_path)
    buf = deque()
    cur_book = None
    produced = 0

    for batch in rows_iter:
        for r in batch:
            b_id = r["bookId"]
            if b_id != cur_book:
                cur_book = b_id
                buf.clear()

            txt = str(r["content"])
            if len(txt) < 3:
                continue

            buf.append({"id": r["id"], "idx": r["lineIndex"], "txt": txt})

            if len(buf) >= window_lines:
                window = list(buf)[-window_lines:]
                full_text = " ".join(w["txt"] for w in window)
                cln_text = clean_text(full_text)

                if len(cln_text) > 30:
                    yield {
                        "bookId": cur_book,
                        "startLine": window[0]["idx"],
                        "text": full_text,
                        "clean": cln_text
                    }
                    produced += 1
                    if produced >= max_chunks:
                        return

                for _ in range(stride):
                    if buf:
                        buf.popleft()

# =========================
# ENGINE CORE
# =========================
@dataclass
class LoadedModel:
    edition: str
    vocab: Dict[str, int]
    emb_norm: np.ndarray
    idf: np.ndarray

@dataclass
class BuiltIndex:
    faiss_index: faiss.Index
    meta_db_path: str
    count: int

class Engine:
    def __init__(self):
        self.model: Optional[LoadedModel] = None
        self.built: Optional[BuiltIndex] = None
        self.book_map: Dict[int, str] = {}
        self.status = {"state": "idle", "msg": "המערכת מוכנה", "progress": 0}
        self._lock = threading.RLock()
        self.last_cfg = load_settings()  # persist

    def _update(self, state, msg, progress):
        with self._lock:
            self.status = {"state": state, "msg": msg, "progress": int(progress)}
        print(f"[{state}] {msg} ({progress}%)")

    def _hf_snapshot_offline_first(self, allow_patterns: List[str]) -> str:
        """
        קודם מנסה לקרוא מה-cache בלי אינטרנט.
        אם לא נמצא – מוריד פעם אחת ואז בפעמים הבאות יהיה מהיר.
        """
        try:
            return snapshot_download(
                repo_id=HF_REPO,
                repo_type="model",
                cache_dir=CACHE_DIR,
                allow_patterns=allow_patterns,
                local_files_only=True,
            )
        except Exception:
            return snapshot_download(
                repo_id=HF_REPO,
                repo_type="model",
                cache_dir=CACHE_DIR,
                allow_patterns=allow_patterns,
            )

    def load_resources(
        self,
        db_path: str,
        edition: str = "v3",
        model_source: str = "hf",  # hf / zip
        zip_path: str = ""
    ):
        if db_path and os.path.exists(db_path):
            self.book_map = get_book_titles(db_path)

        try:
            self._update("downloading", f"טוען מודל {edition} ({model_source})...", 5)

            if model_source == "zip":
                # ברירת מחדל: מחפש zip בשם סטנדרטי בתיקיית models_zips
                if not zip_path:
                    zip_path = os.path.join(MODELS_ZIPS_DIR, f"otzaria_embeddings_{edition}.zip")

                extracted_root = ensure_zip_extracted(zip_path)
                vocab_path, emb_path = find_model_files(extracted_root, edition)

            else:
                path = EDITION_PATHS.get(edition, EDITION_PATHS["v3"])
                local_dir = self._hf_snapshot_offline_first([
                    f"{path}/vocab.json",
                    f"{path}/embeddings_last.npy",
                ])
                base = os.path.join(local_dir, path)
                vocab_path = os.path.join(base, "vocab.json")
                emb_path = os.path.join(base, "embeddings_last.npy")

            with open(vocab_path, "r", encoding="utf-8") as f:
                meta = json.load(f)

            emb = np.load(emb_path).astype(np.float32)
            norms = np.linalg.norm(emb, axis=1, keepdims=True)
            norms[norms == 0] = 1
            emb_norm = emb / norms

            vocab = meta["vocab"]
            freqs = np.array(meta.get("freqs", []), dtype=np.float64)
            if len(freqs) == len(vocab):
                idf = np.log((np.sum(freqs) + 1) / (freqs + 1)) + 1
            else:
                idf = np.ones(len(vocab), dtype=np.float32)

            self.model = LoadedModel(edition, vocab, emb_norm, idf.astype(np.float32))
            self._update("idle", "המודל נטען בהצלחה", 100)

        except Exception as e:
            self._update("error", f"שגיאה בטעינת מודל: {e}", 0)
            raise

    def _stamp(self, edition: str, max_chunks: int) -> str:
        # חשוב: stamp תלוי edition + chunks + window/stride כדי לטעון בדיוק מה שנבנה
        return f"{edition}_N{max_chunks}_W{DEFAULT_WINDOW_LINES}_S{DEFAULT_STRIDE}"

    def build_index(self, db_path: str, max_chunks: int):
        if not self.model:
            return

        stamp = self._stamp(self.model.edition, max_chunks)
        idx_path = os.path.join(RUNTIME_DIR, f"{stamp}.index")
        meta_db_path = os.path.join(RUNTIME_DIR, f"{stamp}.sqlite")

        # אם קיים - טוען
        if os.path.exists(idx_path) and os.path.exists(meta_db_path):
            self._update("loading", "טוען אינדקס קיים...", 50)
            idx = faiss.read_index(idx_path)
            self.built = BuiltIndex(idx, meta_db_path, idx.ntotal)
            if not self.book_map:
                self.book_map = get_book_titles(db_path)
            self._update("ready", f"מוכן לחיפוש ({idx.ntotal:,} רשומות)", 100)
            return

        self._update("indexing", "מתחיל בבניית אינדקס (זה יקח זמן)...", 0)

        if os.path.exists(meta_db_path):
            os.remove(meta_db_path)

        con = sqlite3.connect(meta_db_path)
        con.execute("PRAGMA synchronous = OFF")
        con.execute("PRAGMA journal_mode = MEMORY")

        con.execute("""
            CREATE TABLE chunks (
                rowid INTEGER PRIMARY KEY,
                bookId INTEGER,
                startLine INTEGER,
                text TEXT
            )
        """)
        con.execute("CREATE INDEX idx_book ON chunks(bookId)")

        # FTS5 (BM25)
        # נשמור ב-FTS את הטקסט הנקי
        con.execute("""
            CREATE VIRTUAL TABLE chunks_fts
            USING fts5(text, content='');
        """)

        d = self.model.emb_norm.shape[1]
        index = faiss.IndexIDMap(faiss.IndexFlatIP(d))

        vectors = []
        ids = []
        db_buffer = []
        fts_buffer = []

        batch_size = 5000
        total_processed = 0
        start_time = time.time()

        for chunk in iter_chunks(db_path, max_chunks, DEFAULT_WINDOW_LINES, DEFAULT_STRIDE):
            vec = self._text_to_vec(chunk["clean"])
            if vec is None:
                continue

            current_id = total_processed
            vectors.append(vec)
            ids.append(current_id)

            db_buffer.append((current_id, chunk["bookId"], chunk["startLine"], chunk["text"]))
            fts_buffer.append((current_id, chunk["clean"]))

            total_processed += 1

            if len(vectors) >= batch_size:
                index.add_with_ids(np.vstack(vectors), np.array(ids).astype("int64"))
                con.executemany("INSERT INTO chunks VALUES (?,?,?,?)", db_buffer)
                con.executemany("INSERT INTO chunks_fts(rowid, text) VALUES (?,?)", fts_buffer)
                con.commit()
                vectors, ids, db_buffer, fts_buffer = [], [], [], []

                elapsed = time.time() - start_time
                rate = total_processed / (elapsed + 0.1)
                pct = min(95, int((total_processed / max_chunks) * 100))
                self._update("indexing", f"עובדו {total_processed:,} רשומות ({int(rate)} לשנייה)", pct)

        if vectors:
            index.add_with_ids(np.vstack(vectors), np.array(ids).astype("int64"))
            con.executemany("INSERT INTO chunks VALUES (?,?,?,?)", db_buffer)
            con.executemany("INSERT INTO chunks_fts(rowid, text) VALUES (?,?)", fts_buffer)
            con.commit()

        con.close()
        faiss.write_index(index, idx_path)

        self.built = BuiltIndex(index, meta_db_path, total_processed)
        self._update("ready", "הבנייה הושלמה בהצלחה!", 100)

    def _text_to_vec(self, text: str):
        if not self.model:
            return None
        words = text.split()
        if not words:
            return None

        indices = [self.model.vocab[w] for w in words if w in self.model.vocab]
        if not indices:
            return None

        idfs = self.model.idf[indices]
        vecs = self.model.emb_norm[indices]

        weighted = vecs * idfs[:, None]
        avg_vec = np.sum(weighted, axis=0)

        norm = np.linalg.norm(avg_vec)
        if norm < 1e-9:
            return None
        return avg_vec / norm

    def _fts_candidates(self, q_clean: str, limit: int) -> List[Tuple[int, float]]:
        """
        מחזיר רשימת (rowid, bm25_raw)
        bm25 ב-FTS5: קטן יותר = יותר רלוונטי.
        """
        if not self.built:
            return []
        fts_q = fts_query_from_text(q_clean)
        if not fts_q:
            return []
        con = sqlite3.connect(self.built.meta_db_path)
        con.row_factory = sqlite3.Row
        try:
            rows = con.execute(
                "SELECT rowid, bm25(chunks_fts) AS bm FROM chunks_fts WHERE chunks_fts MATCH ? LIMIT ?",
                (fts_q, int(limit)),
            ).fetchall()
            return [(int(r["rowid"]), float(r["bm"])) for r in rows]
        except:
            return []
        finally:
            con.close()

    def search(self, query: str, book_filter: Optional[int] = None, top_k: int = 20):
        if not self.model or not self.built:
            return []

        q_clean = clean_text(query)
        q_vec = self._text_to_vec(q_clean)
        if q_vec is None:
            return []

        # 1) מועמדים וקטוריים רחבים
        vec_candidates_k = max(top_k * 20, 200)
        scores, ids = self.built.faiss_index.search(np.array([q_vec]), vec_candidates_k)
        vec_found_ids = [int(i) for i in ids[0] if i >= 0]

        # 2) מועמדי FTS (BM25)
        fts_candidates_k = max(top_k * 20, 200)
        fts_rows = self._fts_candidates(q_clean, fts_candidates_k)
        fts_found_ids = [rid for rid, _ in fts_rows]

        # 3) איחוד
        union_ids = []
        seen = set()
        for rid in vec_found_ids + fts_found_ids:
            if rid not in seen:
                union_ids.append(rid)
                seen.add(rid)

        if not union_ids:
            return []

        # 4) שליפת מטא
        con = sqlite3.connect(self.built.meta_db_path)
        con.row_factory = sqlite3.Row

        placeholders = ",".join(["?"] * len(union_ids))
        sql = f"SELECT rowid, bookId, startLine, text FROM chunks WHERE rowid IN ({placeholders})"
        params: List = list(union_ids)

        if book_filter:
            sql += " AND bookId = ?"
            params.append(int(book_filter))

        rows = con.execute(sql, params).fetchall()
        con.close()

        if not rows:
            return []

        # מפות ציונים
        vec_scores = {int(fid): float(scr) for fid, scr in zip(ids[0], scores[0]) if int(fid) >= 0}
        fts_bm = {rid: bm for rid, bm in fts_rows}

        # נרמול BM25 -> [0..1] (היפוך גס; bm25 קטן=טוב)
        def bm_to_rel(bm: Optional[float]) -> float:
            if bm is None:
                return 0.0
            bm_pos = max(0.0, bm)
            return 1.0 / (1.0 + bm_pos)

        q_tokens = get_tokens(q_clean)

        results = []
        for r in rows:
            rid = int(r["rowid"])
            chunk_txt = r["text"]
            chunk_clean = clean_text(chunk_txt)
            chunk_tokens = get_tokens(chunk_clean)
            chunk_words = chunk_clean.split()

            base_vec = vec_scores.get(rid, 0.0)
            bm_rel = bm_to_rel(fts_bm.get(rid))

            intersection = len(q_tokens & chunk_tokens)
            overlap = (intersection / len(q_tokens)) if q_tokens else 0.0

            phrase = 1.0 if (q_clean and q_clean in chunk_clean) else 0.0

            proximity = 0.0
            if intersection > 1 and q_tokens:
                found_indices = []
                for qw in q_tokens:
                    for i, cw in enumerate(chunk_words):
                        if hebrew_stem(cw) == qw:
                            found_indices.append(i)
                            break
                if found_indices:
                    span = max(found_indices) - min(found_indices)
                    density = len(found_indices) / (span + 1)
                    proximity = min(density, 1.0)

            # שקלול Hybrid (אפשר לכוונן)
            final_score = (
                (base_vec * 0.35) +
                (bm_rel   * 0.25) +
                (overlap  * 0.25) +
                (phrase   * 0.10) +
                (proximity* 0.05)
            )

            book_title = self.book_map.get(int(r["bookId"]), f"ספר {int(r['bookId'])}")
            results.append({
                "score": float(final_score),
                "text": chunk_txt,
                "source": f"{book_title}, שורה {int(r['startLine'])}",
                "book_id": int(r["bookId"]),
                "book_title": book_title
            })

        results.sort(key=lambda x: x["score"], reverse=True)
        return results[:top_k]

ENGINE = Engine()

# =========================
# FLASK WEB APP
# =========================
app = Flask(__name__)
app.secret_key = "otzaria_ai_secret_v3"

HTML_TEMPLATE = """
<!DOCTYPE html>
<html lang="he" dir="rtl">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>אוצריא AI - מנוע חכם</title>
    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
    <link href="https://fonts.googleapis.com/css2?family=Heebo:wght@300;400;700&family=Frank+Ruhl+Libre:wght@400;700&display=swap" rel="stylesheet">
    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.10.5/font/bootstrap-icons.css">
    <style>
        :root { --primary:#2c3e50; --accent:#d35400; --bg-light:#fdfbf7; --text-serif:'Frank Ruhl Libre',serif; --text-sans:'Heebo',sans-serif; }
        body { background-color: var(--bg-light); font-family: var(--text-sans); color:#333; }
        .sidebar { background:#fff; height:100vh; position:fixed; right:0; top:0; width:360px; padding:2rem 1.5rem; border-left:1px solid #e0e0e0; overflow-y:auto; }
        .main-content { margin-right:360px; padding:2rem; max-width:900px; }
        @media (max-width: 900px) { .sidebar{position:relative;width:100%;height:auto;border-left:none;} .main-content{margin-right:0;} }
        .brand { font-size:1.8rem; font-weight:700; color:var(--primary); margin-bottom:2rem; display:block; text-decoration:none; }
        .brand span { color:var(--accent); }
        .search-box { background:#fff; padding:1.5rem; border-radius:12px; box-shadow:0 4px 20px rgba(0,0,0,0.06); margin-bottom:2rem; border:1px solid #eee; }
        .search-input { border:1px solid #ddd; padding:0.8rem; font-size:1.1rem; border-radius:8px; }
        .search-input:focus { border-color:var(--accent); box-shadow:0 0 0 3px rgba(211,84,0,0.1); }
        .result-card { background:#fff; border-radius:8px; padding:1.2rem; margin-bottom:1.2rem; border-right:4px solid var(--accent); box-shadow:0 2px 8px rgba(0,0,0,0.03); transition:transform 0.2s; }
        .result-card:hover { transform:translateY(-2px); box-shadow:0 5px 15px rgba(0,0,0,0.08); }
        .res-header { display:flex; justify-content:space-between; align-items:center; margin-bottom:0.8rem; }
        .res-source { font-size:0.95rem; font-weight:700; color:var(--primary); }
        .res-text { font-family:var(--text-serif); font-size:1.2rem; line-height:1.6; color:#222; }
        mark { background-color:#ffeaa7; padding:0 2px; border-radius:3px; }
        .stats-pill { font-size:0.8rem; background:#eee; padding:4px 10px; border-radius:20px; color:#666; }
        .hint { font-size:0.8rem; color:#666; }
    </style>
</head>
<body>

<div class="sidebar">
    <a href="/" class="brand">אוצריא <span>AI</span></a>

    <div class="mb-4">
        <label class="small fw-bold text-muted mb-2">סטטוס אינדקס</label>
        <div class="progress" style="height: 6px;">
            <div id="status-bar" class="progress-bar bg-success" style="width: 0%"></div>
        </div>
        <div class="d-flex justify-content-between mt-1">
            <span class="small text-muted" id="status-text">ממתין...</span>
            <span class="small fw-bold" id="idx-count">{{ idx_count }}</span>
        </div>
    </div>

    <hr>

    <form action="/setup" method="post" class="mt-4">
        <h6 class="fw-bold mb-3"><i class="bi bi-gear"></i> הגדרות בנייה</h6>

        <div class="mb-3">
            <label class="form-label small">נתיב בסיס נתונים</label>
            <input type="text" name="db_path" class="form-control form-control-sm" value="{{ db_path }}">
        </div>

        <div class="mb-3">
            <label class="form-label small">Edition (ברירת מחדל v3)</label>
            <select name="edition" class="form-select form-select-sm">
                {% for e in ["v1","v2","v3"] %}
                    <option value="{{ e }}" {% if e == edition %}selected{% endif %}>{{ e }}</option>
                {% endfor %}
            </select>
        </div>

        <div class="mb-3">
            <label class="form-label small">מספר רשומות (Chunks)</label>
            <input type="number" name="max_chunks" class="form-control form-control-sm" value="{{ max_chunks }}">
        </div>

        <hr class="my-3">

        <div class="mb-2">
            <label class="form-label small">מקור מודל</label>
            <select name="model_source" class="form-select form-select-sm">
                <option value="hf" {% if model_source == "hf" %}selected{% endif %}>HuggingFace (Offline-first)</option>
                <option value="zip" {% if model_source == "zip" %}selected{% endif %}>ZIP מקומי</option>
            </select>
            <div class="hint mt-1">
                אם בוחרים ZIP מקומי: אפשר להשאיר נתיב ריק ואז ייטען אוטומטית מ-<code>models_zips/otzaria_embeddings_v3.zip</code>
            </div>
        </div>

        <div class="mb-3">
            <label class="form-label small">נתיב ZIP (אופציונלי)</label>
            <input type="text" name="zip_path" class="form-control form-control-sm" value="{{ zip_path or '' }}" placeholder="למשל: C:\\Downloads\\otzaria_embeddings_v3.zip">
        </div>

        <button type="submit" class="btn btn-dark btn-sm w-100">בנה / טען</button>
        <div class="small text-muted mt-2">
            אם כבר קיים אינדקס תואם ב-runtime — הוא ייטען אוטומטית בלי בנייה מחדש.
        </div>
    </form>
</div>

<div class="main-content">
    <form action="/" method="get" class="search-box">
        <div class="row g-2">
            <div class="col-md-8">
                <input type="text" name="q" class="form-control search-input" placeholder="חיפוש חופשי" value="{{ query or '' }}" autofocus>
            </div>
            <div class="col-md-4">
                <select name="book_id" class="form-select" style="height: 100%; padding: 0.8rem;">
                    <option value="">כל הספרים</option>
                    {% for bid, title in books.items() %}
                        <option value="{{ bid }}" {% if selected_book|int == bid %}selected{% endif %}>{{ title }}</option>
                    {% endfor %}
                </select>
            </div>
            <div class="col-12 mt-2 text-end">
                <button type="submit" class="btn btn-warning text-white fw-bold px-4">חפש</button>
            </div>
        </div>
    </form>

    {% if query %}
        <div class="d-flex justify-content-between align-items-center mb-4">
            <h5 class="mb-0">תוצאות עבור: <strong>{{ query }}</strong></h5>
            <span class="stats-pill">{{ results|length }} תוצאות</span>
        </div>

        {% if not results %}
             <div class="text-center py-5 text-muted">
                <i class="bi bi-search display-4 opacity-25"></i>
                <p class="mt-3">לא נמצאו תוצאות מתאימות.</p>
                {% if selected_book %}
                <p class="small">נסה להסיר את הסינון לפי ספר.</p>
                {% endif %}
             </div>
        {% endif %}

        {% for r in results %}
        <div class="result-card">
            <div class="res-header">
                <span class="res-source"><i class="bi bi-book-fill me-1 opacity-50"></i> {{ r.source }}</span>
            </div>
            <div class="res-text">
                {{ r.text | highlight(query) | safe }}
            </div>
        </div>
        {% endfor %}

    {% else %}
        <div class="text-center py-5 opacity-75">
            <h4>ברוכים הבאים למנוע החיפוש הסמנטי</h4>
            <p>המערכת משתמשת ב-Hybrid Search (וקטורי + BM25) כדי לשפר דיוק.</p>
            <p class="hint">להפצה עם ZIP: שים ZIP בתיקייה <code>models_zips</code> ובחר “ZIP מקומי”.</p>
        </div>
    {% endif %}
</div>

<script>
    function updateStatus() {
        fetch('/status')
            .then(r => r.json())
            .then(data => {
                document.getElementById('status-text').innerText = data.msg;
                document.getElementById('status-bar').style.width = data.progress + '%';
                if(data.count) document.getElementById('idx-count').innerText = data.count.toLocaleString();
                let interval = (data.state === 'indexing' || data.state === 'downloading') ? 1000 : 5000;
                setTimeout(updateStatus, interval);
            })
            .catch(e => setTimeout(updateStatus, 5000));
    }
    document.addEventListener('DOMContentLoaded', updateStatus);
</script>

</body>
</html>
"""

# =========================
# HELPER FILTERS
# =========================
def highlight_text(text, query):
    if not query:
        return text
    q_words = [hebrew_stem(w) for w in clean_text(query).split() if len(w) > 1]
    if not q_words:
        return text

    patterns = []
    for w in q_words:
        pat = r'(?:^|[\s\"\'\-])([ו|מ|ש|ה|ל|ב|כ]?' + re.escape(w) + r')(?=[\s\"\'\.\,\-]|$)'
        patterns.append(pat)

    combined_pattern = "|".join(patterns)

    def replacer(match):
        full_match = match.group(0)
        word_match = re.search(r'[א-ת]+', full_match)
        if word_match:
            wm = word_match.group(0)
            return full_match.replace(wm, f'<mark>{wm}</mark>')
        return full_match

    try:
        return re.sub(combined_pattern, replacer, text)
    except:
        return text

@app.template_filter('highlight')
def highlight_filter(text, query):
    return highlight_text(text, query)

# =========================
# ROUTES
# =========================
@app.route("/")
def index():
    q = request.args.get("q", "").strip()
    book_id_str = request.args.get("book_id", "")
    book_id = int(book_id_str) if book_id_str.isdigit() else None

    results = []
    all_books = ENGINE.book_map.copy()
    sorted_books = dict(sorted(all_books.items(), key=lambda item: item[1])[:800])

    if q:
        if not ENGINE.built:
            # אם אין אינדקס, ננסה לטעון אוטומטית לפי settings (או default) ואז לחפש
            cfg = ENGINE.last_cfg or {}
            dbp = cfg.get("db_path", DEFAULT_DB_PATH)
            ed = cfg.get("edition", "v3")
            mc = int(cfg.get("max_chunks", 100000))
            model_source = cfg.get("model_source", "hf")
            zip_path = cfg.get("zip_path", "")

            def task():
                try:
                    ENGINE.load_resources(dbp, ed, model_source=model_source, zip_path=zip_path)
                    ENGINE.build_index(dbp, mc)
                except Exception:
                    pass

            threading.Thread(target=task, daemon=True).start()
        else:
            results = ENGINE.search(q, book_filter=book_id, top_k=20)

    idx_c = ENGINE.built.count if ENGINE.built else 0

    cfg = ENGINE.last_cfg or {}
    return render_template_string(
        HTML_TEMPLATE,
        query=q,
        results=results,
        db_path=cfg.get("db_path", DEFAULT_DB_PATH),
        edition=cfg.get("edition", "v3"),
        max_chunks=int(cfg.get("max_chunks", 100000)),
        model_source=cfg.get("model_source", "hf"),
        zip_path=cfg.get("zip_path", ""),
        idx_count=idx_c,
        books=sorted_books,
        selected_book=book_id
    )

@app.route("/setup", methods=["POST"])
def setup():
    db = request.form.get("db_path", DEFAULT_DB_PATH).strip()
    edition = request.form.get("edition", "v3").strip()
    mc = int(request.form.get("max_chunks", 100000))
    model_source = request.form.get("model_source", "hf").strip()
    zip_path = request.form.get("zip_path", "").strip()

    # persist settings
    ENGINE.last_cfg = {
        "db_path": db,
        "edition": edition,
        "max_chunks": mc,
        "model_source": model_source,
        "zip_path": zip_path,
    }
    save_settings(ENGINE.last_cfg)

    def task():
        ENGINE.load_resources(db, edition, model_source=model_source, zip_path=zip_path)
        ENGINE.build_index(db, mc)

    threading.Thread(target=task, daemon=True).start()
    return redirect("/")

@app.route("/status")
def status_api():
    s = ENGINE.status.copy()
    if ENGINE.built:
        s["count"] = ENGINE.built.count
    return jsonify(s)

# =========================
# MAIN
# =========================
if __name__ == "__main__":
    # טעינה אוטומטית על בסיס settings
    cfg = load_settings()
    dbp = cfg.get("db_path", DEFAULT_DB_PATH)
    ed = cfg.get("edition", "v3")
    mc = int(cfg.get("max_chunks", 100000))
    model_source = cfg.get("model_source", "hf")
    zip_path = cfg.get("zip_path", "")

    ENGINE.last_cfg = {
        "db_path": dbp,
        "edition": ed,
        "max_chunks": mc,
        "model_source": model_source,
        "zip_path": zip_path,
    }

    def boot():
        try:
            ENGINE.load_resources(dbp, ed, model_source=model_source, zip_path=zip_path)
            ENGINE.build_index(dbp, mc)
        except Exception as e:
            print("Boot warning:", e)

    threading.Thread(target=boot, daemon=True).start()

    print("Starting Enhanced Server at http://127.0.0.1:8000")
    app.run(host="127.0.0.1", port=8000, debug=True)
