import os
import re
import json
import math
import time
import sqlite3
import shutil
import threading
import zipfile
import hashlib
import functools
from dataclasses import dataclass
from collections import deque
from pathlib import Path
from typing import Dict, Optional, Set, List, Tuple

import tkinter as tk
from tkinter import filedialog
import requests
import numpy as np
import faiss
from flask import Flask, request, render_template_string, redirect, jsonify, flash
from huggingface_hub import snapshot_download

# =========================
# CONFIG
# =========================
HF_REPO = "ArieLLL123/otzaria-embeddings"
DEFAULT_DB_PATH = r"C:\אוצריא\אוצריא\seforim.db"
DB_DOWNLOAD_URL = "https://github.com/Otzaria/otzaria-library/releases/download/library-db-1/seforim.zip"

EDITION_PATHS = {
    "v1": "editions/otzaria_embeddings_v1",
    "v2": "editions/otzaria_embeddings_v2",
    "v3": "editions/otzaria_embeddings_v3",
}

BASE_DIR = os.path.dirname(__file__)
CACHE_DIR = os.path.join(BASE_DIR, "hf_cache")
RUNTIME_DIR = os.path.join(BASE_DIR, "runtime")
DB_DIR = os.path.join(BASE_DIR, "db")
MODELS_ZIPS_DIR = os.path.join(BASE_DIR, "models_zips")
LOCAL_MODELS_DIR = os.path.join(BASE_DIR, "local_models")

SETTINGS_PATH = os.path.join(RUNTIME_DIR, "settings.json")

DEFAULT_TOP_K = 20
DEFAULT_MIN_SCORE = 0.0

os.makedirs(CACHE_DIR, exist_ok=True)
os.makedirs(RUNTIME_DIR, exist_ok=True)
os.makedirs(DB_DIR, exist_ok=True)
os.makedirs(MODELS_ZIPS_DIR, exist_ok=True)
os.makedirs(LOCAL_MODELS_DIR, exist_ok=True)

try:
    from werkzeug.utils import secure_filename
except ImportError:
    def secure_filename(filename): return filename

# הגדרות חלון מילים חכם (Smart Chunking)
IDEAL_CHUNK_WORDS = 50   # המספר שבו מתחילים לחפש סימן פיסוק
MAX_CHUNK_WORDS = 60     # הגבול העליון לחיתוך
DEFAULT_OVERLAP_WORDS = 10 # חפיפה בסיסית בין מקטעים

# =========================
# TEXT TOOLS & HEBREW NLP
# =========================
NIQQUD_RE   = re.compile(r"[\u0591-\u05C7]")
HTML_TAG_RE = re.compile(r"<[^>]+>")
NON_WORD_RE = re.compile(r"[^0-9A-Za-z\u0590-\u05FF\"']+")
HEB_LETTERS = "אבגדהוזחטיכלמנסעפצקרשתםןףךץ"

def clean_text(s: str) -> str:
    if not s: return ""
    s = HTML_TAG_RE.sub(" ", s)
    s = NIQQUD_RE.sub("", s)
    s = s.replace('״', '"').replace('׳', "'")
    s = NON_WORD_RE.sub(" ", s)
    return " ".join(s.split())

@functools.lru_cache(maxsize=10000)
def hebrew_stem(word: str) -> str:
    if len(word) < 4: return word
    prefixes = ['וכש', 'וש', 'וה', 'וב', 'ול', 'ומ', 'כש', 'שב', 'שה', 'מש', 'מה', 'ו', 'ה', 'ב', 'ל', 'מ', 'ש', 'כ']
    for p in prefixes:
        if word.startswith(p) and len(word) > len(p) + 2:
            return word[len(p):]
    return word

def get_tokens(text: str) -> Set[str]:
    words = clean_text(text).split()
    return {hebrew_stem(w) for w in words if w}

def fts_query_from_text(q_clean: str) -> str:
    toks = [t for t in clean_text(q_clean).split() if len(t) > 1]
    return " ".join(toks) if toks else ""

# =========================
# SETTINGS PERSISTENCE
# =========================
def load_settings() -> dict:
    if not os.path.exists(SETTINGS_PATH): return {}
    try:
        with open(SETTINGS_PATH, "r", encoding="utf-8") as f:
            return json.load(f) or {}
    except: return {}

def save_settings(data: dict) -> None:
    try:
        with open(SETTINGS_PATH, "w", encoding="utf-8") as f:
            json.dump(data, f, ensure_ascii=False, indent=2)
    except: pass

# =========================
# ZIP MODEL SUPPORT
# =========================
def sha256_file(path: str) -> str:
    h = hashlib.sha256()
    with open(path, "rb") as f:
        for chunk in iter(lambda: f.read(1024 * 1024), b""):
            h.update(chunk)
    return h.hexdigest()

def ensure_zip_extracted(zip_path: str) -> str:
    if not os.path.exists(zip_path): raise FileNotFoundError(f"ZIP לא נמצא: {zip_path}")
    zhash = sha256_file(zip_path)[:16]
    target_dir = os.path.join(LOCAL_MODELS_DIR, zhash)
    marker = os.path.join(target_dir, ".extracted_ok")
    if os.path.exists(marker): return target_dir
    os.makedirs(target_dir, exist_ok=True)
    with zipfile.ZipFile(zip_path, "r") as z:
        for member in z.infolist():
            member_path = os.path.join(target_dir, member.filename)
            abs_target = os.path.abspath(target_dir)
            abs_member = os.path.abspath(member_path)
            if not abs_member.startswith(abs_target + os.sep) and abs_member != abs_target:
                raise RuntimeError("ZIP לא תקין (path traversal).")
        z.extractall(target_dir)
    with open(marker, "w", encoding="utf-8") as f:
        f.write(time.strftime("%Y-%m-%d %H:%M:%S"))
    return target_dir

def find_model_files(root_dir: str, edition: str) -> tuple[str, str]:
    candidates_vocab = list(Path(root_dir).rglob("vocab.json"))
    candidates_emb   = list(Path(root_dir).rglob("embeddings_last.npy"))
    if not candidates_vocab or not candidates_emb:
        raise FileNotFoundError("לא מצאתי בתוך ה-ZIP את vocab.json ו/או embeddings_last.npy.")
    prefer_key = f"otzaria_embeddings_{edition}".lower()
    def pick(cands):
        for p in cands:
            if prefer_key in str(p).lower(): return str(p)
        return str(cands[0])
    return pick(candidates_vocab), pick(candidates_emb)

# =========================
# DATABASE & STREAMING
# =========================
def get_book_titles(db_path: str) -> Dict[int, str]:
    titles = {}
    if not os.path.exists(db_path): return titles
    try:
        con = sqlite3.connect(db_path)
        cur = con.execute("SELECT id, title FROM book")
        for r in cur: titles[r[0]] = r[1]
        con.close()
    except Exception as e: print(f"שגיאה בטעינת שמות ספרים: {e}")
    return titles

def iter_rows_ordered(db_path: str, chunk_rows: int = 20000):
    if not os.path.exists(db_path): raise FileNotFoundError(f"קובץ מסד הנתונים לא נמצא: {db_path}")
    con = sqlite3.connect(db_path)
    con.row_factory = sqlite3.Row
    con.execute("PRAGMA journal_mode=OFF;")
    table_name = "line"
    try:
        con.execute("SELECT 1 FROM lines LIMIT 1")
        table_name = "lines"
    except: pass
    try: con.execute(f"SELECT 1 FROM {table_name} LIMIT 1")
    except:
        con.close(); return
    q = f"SELECT id, bookId, lineIndex, content FROM {table_name} WHERE content IS NOT NULL AND content != '' ORDER BY bookId, lineIndex"
    cur = con.execute(q)
    while True:
        rows = cur.fetchmany(chunk_rows)
        if not rows: break
        yield rows
    con.close()

def iter_chunks(db_path: str, max_chunks: int, ideal_words: int = IDEAL_CHUNK_WORDS, max_words: int = MAX_CHUNK_WORDS, overlap_words: int = DEFAULT_OVERLAP_WORDS):
    rows_iter = iter_rows_ordered(db_path)
    buf = []
    cur_book = None
    produced = 0
    # סימני פיסוק שאנחנו מחשיבים כסוף משפט / רעיון
    punctuation = ('.', ':', ';', '?', '!')

    def flush_chunk(buffer_slice, b_id):
        chunk_text = " ".join([w for _, w in buffer_slice])
        cln_text = clean_text(chunk_text)
        if len(cln_text) > 30:
            return {"bookId": b_id, "startLine": buffer_slice[0][0], "text": chunk_text, "clean": cln_text}
        return None

    for batch in rows_iter:
        for r in batch:
            b_id = r["bookId"]

            # אם עברנו לספר חדש, נרוקן את החוצץ
            if cur_book is not None and b_id != cur_book:
                if buf and len(buf) > 15:
                    chunk_data = flush_chunk(buf, cur_book)
                    if chunk_data:
                        yield chunk_data
                        produced += 1
                        if produced >= max_chunks: return
                buf = []

            cur_book = b_id
            txt = str(r["content"]).strip()
            if not txt: continue

            # מוסיפים מילים לחוצץ תוך שמירה על סימני הפיסוק המקוריים
            for w in txt.split():
                buf.append((r["lineIndex"], w))

            # כל עוד יש לנו מספיק מילים לחפש חיתוך חכם
            while len(buf) >= ideal_words:
                split_idx = -1
                
                # מחפשים סימן פיסוק בטווח שבין המינימום למקסימום
                for i in range(ideal_words - 1, min(len(buf), max_words)):
                    if buf[i][1].endswith(punctuation):
                        split_idx = i
                        break
                
                # אם לא מצאנו סימן פיסוק (למשל בספרות תורנית ישנה), נחתוך במקסימום
                if split_idx == -1:
                    split_idx = min(len(buf) - 1, max_words - 1)

                # יצירת המקטע ושליחתו
                chunk_slice = buf[:split_idx + 1]
                chunk_data = flush_chunk(chunk_slice, cur_book)
                if chunk_data:
                    yield chunk_data
                    produced += 1
                    if produced >= max_chunks: return

                # חישוב החפיפה (Overlap) - ננסה להתחיל את המקטע הבא מתחילת משפט
                stride_start = (split_idx + 1) - overlap_words
                if stride_start > 0:
                    # סריקה לאחור/קדימה כדי למצוא נקודה להתחיל ממנה את החפיפה (אחרי סימן פיסוק)
                    adjusted_start = stride_start
                    for i in range(max(1, stride_start - 15), min(len(buf), stride_start + 15)):
                        if buf[i-1][1].endswith(punctuation):
                            adjusted_start = i
                            break
                    stride_start = adjusted_start
                else:
                    stride_start = 0

                # חיתוך החוצץ להמשך העבודה
                buf = buf[stride_start:]

    # שאריות אחרונות
    if buf and len(buf) > 15:
        chunk_data = flush_chunk(buf, cur_book)
        if chunk_data:
            yield chunk_data

# =========================
# ENGINE CORE
# =========================
@dataclass
class LoadedModel:
    edition: str
    vocab: Dict[str, int]
    emb_norm: np.ndarray
    idf: np.ndarray
    idx_to_word: Dict[int, str]
    word_freqs: Dict[str, float]

@dataclass
class BuiltIndex:
    faiss_index: faiss.Index
    meta_db_path: str
    count: int

class Engine:
    def __init__(self):
        self.model: Optional[LoadedModel] = None
        self.built: Optional[BuiltIndex] = None
        self.book_map: Dict[int, str] = {}
        self.status = {"state": "idle", "msg": "המערכת מוכנה", "progress": 0}
        self._lock = threading.RLock()
        self.last_cfg = load_settings()

    def _update(self, state, msg, progress):
        with self._lock:
            self.status = {"state": state, "msg": msg, "progress": int(progress)}
        print(f"[{state}] {msg} ({progress}%)")

    def _hf_snapshot_offline_first(self, allow_patterns: List[str]) -> str:
        try:
            return snapshot_download(repo_id=HF_REPO, repo_type="model", cache_dir=CACHE_DIR, allow_patterns=allow_patterns, local_files_only=True)
        except Exception:
            return snapshot_download(repo_id=HF_REPO, repo_type="model", cache_dir=CACHE_DIR, allow_patterns=allow_patterns, local_files_only=False)

    def load_resources(self, db_path: str, edition: str = "v3", model_source: str = "hf", zip_path: str = ""):
        if db_path and os.path.exists(db_path):
            self.book_map = get_book_titles(db_path)
        try:
            self._update("downloading", f"טוען מודל {edition} ({model_source})...", 5)
            if model_source == "zip":
                if not zip_path: zip_path = os.path.join(MODELS_ZIPS_DIR, f"otzaria_embeddings_{edition}.zip")
                extracted_root = ensure_zip_extracted(zip_path)
                vocab_path, emb_path = find_model_files(extracted_root, edition)
            else:
                path = EDITION_PATHS.get(edition, EDITION_PATHS["v3"])
                local_dir = self._hf_snapshot_offline_first([f"{path}/vocab.json", f"{path}/embeddings_last.npy"])
                base = os.path.join(local_dir, path)
                vocab_path = os.path.join(base, "vocab.json")
                emb_path = os.path.join(base, "embeddings_last.npy")

            with open(vocab_path, "r", encoding="utf-8") as f: meta = json.load(f)
            # Use mmap_mode to avoid loading the raw file entirely into RAM before normalization
            emb = np.load(emb_path, mmap_mode='r')
            norms = np.linalg.norm(emb, axis=1, keepdims=True)
            norms[norms == 0] = 1
            # This division creates a new in-memory array, but we saved the RAM of the raw 'emb'
            emb_norm = emb / norms
            vocab = meta["vocab"]
            freqs = np.array(meta.get("freqs", []), dtype=np.float64)
            
            if len(freqs) == len(vocab):
                idf = np.log((np.sum(freqs) + 1) / (freqs + 1)) + 1
                word_freqs = {w: float(freqs[idx]) for w, idx in vocab.items()}
            else:
                idf = np.ones(len(vocab), dtype=np.float32)
                word_freqs = {w: 1.0 for w in vocab.keys()}
                
            idx_to_word = {idx: w for w, idx in vocab.items()}
            self.model = LoadedModel(edition, vocab, emb_norm, idf.astype(np.float32), idx_to_word, word_freqs)
            self._update("idle", "המודל נטען בהצלחה", 100)
        except Exception as e:
            self._update("error", f"שגיאה בטעינת מודל: {e}", 0)
            raise

    def _stamp(self, edition: str, max_chunks: int, ideal: int, max_w: int, overlap: int) -> str:
        return f"{edition}_N{max_chunks}_Ideal{ideal}_Max{max_w}_Overlap{overlap}"
    
    def build_index(self, db_path: str, max_chunks: int, ideal: int = IDEAL_CHUNK_WORDS, max_w: int = MAX_CHUNK_WORDS, overlap: int = DEFAULT_OVERLAP_WORDS):
        if not self.model or self.status["state"] == "indexing": return
        stamp = self._stamp(self.model.edition, max_chunks, ideal, max_w, overlap)
        idx_path = os.path.join(RUNTIME_DIR, f"{stamp}.index")
        meta_db_path = os.path.join(RUNTIME_DIR, f"{stamp}.sqlite")

        if os.path.exists(idx_path) and os.path.exists(meta_db_path):
            self._update("loading", "טוען אינדקס קיים...", 50)
            idx = faiss.read_index(idx_path)
            self.built = BuiltIndex(idx, meta_db_path, idx.ntotal)
            if not self.book_map: self.book_map = get_book_titles(db_path)
            self._update("ready", f"מוכן לחיפוש ({idx.ntotal:,} רשומות)", 100)
            return

        self._update("indexing", "מתחיל בבניית אינדקס (זה יקח זמן)...", 0)
        
        # שימוש בקובץ זמני כדי למנוע התנגשויות עם תהליכים רצים (פותר UNIQUE constraint failed)
        temp_db_path = meta_db_path + ".tmp"
        try:
            if os.path.exists(temp_db_path): os.remove(temp_db_path)
        except OSError: pass

        con = sqlite3.connect(temp_db_path, timeout=30)
        con.execute("PRAGMA journal_mode=WAL;")
        con.execute("PRAGMA journal_mode = MEMORY")
        con.execute("DROP TABLE IF EXISTS chunks")
        con.execute("DROP TABLE IF EXISTS chunks_fts")
        con.execute("CREATE TABLE chunks (rowid INTEGER PRIMARY KEY, bookId INTEGER, startLine INTEGER, text TEXT)")
        con.execute("CREATE INDEX idx_book ON chunks(bookId)")
        con.execute("CREATE VIRTUAL TABLE chunks_fts USING fts5(text, content='');")

        d = self.model.emb_norm.shape[1]
        
        # 🔹 OPTIMIZATION: Use IVF Index for large datasets (>20k chunks)
        # This changes complexity from O(N) to O(log N) roughly.
        use_ivf = max_chunks > 20000
        if use_ivf:
            # Calculate number of clusters (centroids) based on dataset size
            nlist = int(4 * math.sqrt(max_chunks))
            quantizer = faiss.IndexFlatIP(d)
            # IndexIVFFlat requires training
            ivf_index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_INNER_PRODUCT)
            ivf_index.nprobe = 10  # Search 10 nearest clusters (Balance speed/accuracy)
            index = faiss.IndexIDMap(ivf_index)
            is_trained = False
        else:
            index = faiss.IndexIDMap(faiss.IndexFlatIP(d))
            is_trained = True

        vectors, ids, db_buffer, fts_buffer = [], [], [], []
        batch_size = 5000
        total_processed = 0
        start_time = time.time()

        for chunk in iter_chunks(db_path, max_chunks, ideal, max_w, overlap):
            vec = self._text_to_vec(chunk["clean"])
            if vec is None: continue
            current_id = total_processed
            vectors.append(vec)
            ids.append(current_id)
            db_buffer.append((current_id, chunk["bookId"], chunk["startLine"], chunk["text"]))
            fts_buffer.append((current_id, chunk["clean"]))
            total_processed += 1

            if len(vectors) >= batch_size:
                # Train IVF index on the first batch if needed
                if use_ivf and not is_trained:
                    self._update("indexing", "מאמן אינדקס וקטורי (IVF)...", 5)
                    # We need to access the sub-index to train
                    index.index.train(np.vstack(vectors))
                    is_trained = True

                index.add_with_ids(np.vstack(vectors), np.array(ids).astype("int64"))
                con.executemany("INSERT INTO chunks VALUES (?,?,?,?)", db_buffer)
                con.executemany("INSERT INTO chunks_fts(rowid, text) VALUES (?,?)", fts_buffer)
                con.commit()
                vectors, ids, db_buffer, fts_buffer = [], [], [], []
                elapsed = time.time() - start_time
                rate = total_processed / (elapsed + 0.1)
                pct = min(95, int((total_processed / max_chunks) * 100))
                self._update("indexing", f"עובדו {total_processed:,} רשומות ({int(rate)} לשנייה)", pct)

        if vectors:
            if use_ivf and not is_trained:
                 # Edge case: Total records < batch_size but > 20k (unlikely config, but safe to handle)
                 index.index.train(np.vstack(vectors))
            
            index.add_with_ids(np.vstack(vectors), np.array(ids).astype("int64"))
            con.executemany("INSERT INTO chunks VALUES (?,?,?,?)", db_buffer)
            con.executemany("INSERT INTO chunks_fts(rowid, text) VALUES (?,?)", fts_buffer)
            con.commit()

        con.close()
        faiss.write_index(index, idx_path)
        
        # החלפת הקובץ המקורי בקובץ הזמני
        final_db_path = meta_db_path
        try:
            if os.path.exists(meta_db_path): os.remove(meta_db_path)
            os.rename(temp_db_path, meta_db_path)
        except OSError:
            # במקרה של כישלון (קובץ נעול), נשתמש בקובץ הזמני לריצה הנוכחית
            final_db_path = temp_db_path
            
        self.built = BuiltIndex(index, final_db_path, total_processed)
        self._update("ready", "הבנייה הושלמה בהצלחה!", 100)

    def _text_to_vec(self, text: str):
        if not self.model: return None
        words = text.split()
        if not words: return None
        indices = [self.model.vocab[w] for w in words if w in self.model.vocab]
        if not indices: return None
        idfs = self.model.idf[indices]
        vecs = self.model.emb_norm[indices]
        weighted = vecs * idfs[:, None]
        avg_vec = np.sum(weighted, axis=0)
        norm = np.linalg.norm(avg_vec)
        if norm < 1e-9: return None
        return avg_vec / norm

    # 🔹 SPELL CHECK ALGORITHM (NORVIG)
    def check_spelling(self, query: str) -> Optional[str]:
        if not self.model or not query: return None
        words = clean_text(query).split()
        corrected = []
        changed = False
        for w in words:
            if w in self.model.word_freqs or len(w) <= 2:
                corrected.append(w)
            else:
                c = self._correct_word(w)
                corrected.append(c)
                if c != w: changed = True
        return " ".join(corrected) if changed else None

    def _correct_word(self, word: str) -> str:
        candidates = (self._known([word]) or self._known(self._edits1(word)) or [word])
        return max(candidates, key=lambda w: self.model.word_freqs.get(w, 0))

    def _known(self, words):
        return set(w for w in words if w in self.model.word_freqs)

    def _edits1(self, word):
        splits = [(word[:i], word[i:]) for i in range(len(word) + 1)]
        deletes = [L + R[1:] for L, R in splits if R]
        transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R) > 1]
        replaces = [L + c + R[1:] for L, R in splits if R for c in HEB_LETTERS]
        inserts = [L + c + R for L, R in splits for c in HEB_LETTERS]
        return set(deletes + transposes + replaces + inserts)

    # 🔹 QUERY EXPANSION ALGORITHM
    def _build_expanded_fts_query(self, q_clean: str, top_synonyms: int = 2, threshold: float = 0.7) -> str:
        if not self.model or not self.model.idx_to_word:
            return fts_query_from_text(q_clean)
        tokens = [t for t in q_clean.split() if len(t) > 1]
        if not tokens: return ""
        expanded_parts = []
        for t in tokens:
            synonyms = [t]
            if t in self.model.vocab:
                idx = self.model.vocab[t]
                vec = self.model.emb_norm[idx]
                sims = np.dot(self.model.emb_norm, vec)
                best_indices = np.argsort(sims)[-(top_synonyms + 2):][::-1]
                for bi in best_indices:
                    if bi != idx and sims[bi] > threshold:
                        synonyms.append(self.model.idx_to_word[bi])
            part = "(" + " OR ".join(f'"{s}"' for s in synonyms) + ")"
            expanded_parts.append(part)
        return " ".join(expanded_parts)

    def get_expanded_terms(self, q_clean: str, top_synonyms: int = 2, threshold: float = 0.7) -> list[str]:
            if not self.model or not self.model.idx_to_word:
                return [t for t in q_clean.split() if len(t) > 1]
            
            tokens = [t for t in q_clean.split() if len(t) > 1]
            expanded = set(tokens) # נשמור את המילים המקוריות
            
            for t in tokens:
                if t in self.model.vocab:
                    idx = self.model.vocab[t]
                    vec = self.model.emb_norm[idx]
                    sims = np.dot(self.model.emb_norm, vec)
                    # מציאת המילים הקרובות ביותר (וקטורית)
                    best_indices = np.argsort(sims)[-(top_synonyms + 2):][::-1]
                    for bi in best_indices:
                        if bi != idx and sims[bi] > threshold:
                            expanded.add(self.model.idx_to_word[bi])
                            
            return list(expanded)

    def _fts_candidates(self, q_clean: str, limit: int) -> List[Tuple[int, float]]:
        if not self.built: return []
        fts_q = self._build_expanded_fts_query(q_clean)
        if not fts_q: return []
        con = sqlite3.connect(self.built.meta_db_path, timeout=30)
        con.row_factory = sqlite3.Row
        try:
            rows = con.execute("SELECT rowid, bm25(chunks_fts) AS bm FROM chunks_fts WHERE chunks_fts MATCH ? LIMIT ?", (fts_q, int(limit))).fetchall()
            return [(int(r["rowid"]), float(r["bm"])) for r in rows]
        except: return []
        finally: con.close()

    def search(self, query: str, book_filter: Optional[int] = None, top_k: int = 20):
        if not self.model or not self.built: return []
        q_clean = clean_text(query)
        q_vec = self._text_to_vec(q_clean)
        if q_vec is None: return []

        vec_candidates_k = max(top_k * 20, 200)
        scores, ids = self.built.faiss_index.search(np.array([q_vec]), vec_candidates_k)
        vec_found_ids = [int(i) for i in ids[0] if i >= 0]

        fts_candidates_k = max(top_k * 20, 200)
        fts_rows = self._fts_candidates(q_clean, fts_candidates_k)
        fts_found_ids = [rid for rid, _ in fts_rows]

        union_ids = list(set(vec_found_ids + fts_found_ids))
        if not union_ids: return []

        con = sqlite3.connect(self.built.meta_db_path, timeout=30)
        con.row_factory = sqlite3.Row
        placeholders = ",".join(["?"] * len(union_ids))
        sql = f"SELECT rowid, bookId, startLine, text FROM chunks WHERE rowid IN ({placeholders})"
        params: List = list(union_ids)
        if book_filter:
            sql += " AND bookId = ?"
            params.append(int(book_filter))
        rows = con.execute(sql, params).fetchall()
        con.close()

        vec_scores = {int(fid): float(scr) for fid, scr in zip(ids[0], scores[0]) if int(fid) >= 0}
        fts_bm = {rid: bm for rid, bm in fts_rows}
        def bm_to_rel(bm: Optional[float]) -> float:
            return 1.0 / (1.0 + max(0.0, bm)) if bm is not None else 0.0

        q_tokens = get_tokens(q_clean)
        cfg = self.last_cfg or {}
        w_vec       = float(cfg.get("w_vec", 0.35))
        w_bm        = float(cfg.get("w_bm", 0.25))
        w_overlap   = float(cfg.get("w_overlap", 0.25))
        w_phrase    = float(cfg.get("w_phrase", 0.10))
        w_proximity = float(cfg.get("w_proximity", 0.05))
        total_weight = w_vec + w_bm + w_overlap + w_phrase + w_proximity
        if total_weight == 0: total_weight = 1

        results = []
        for r in rows:
            rid = int(r["rowid"])
            chunk_txt = r["text"]
            chunk_clean = clean_text(chunk_txt)
            chunk_tokens = get_tokens(chunk_clean)
            chunk_words = chunk_clean.split()

            base_vec = vec_scores.get(rid, 0.0)
            bm_rel = bm_to_rel(fts_bm.get(rid))
            intersection = len(q_tokens & chunk_tokens)
            overlap = (intersection / len(q_tokens)) if q_tokens else 0.0
            phrase = 1.0 if (q_clean and q_clean in chunk_clean) else 0.0
            proximity = 0.0
            if intersection > 1 and q_tokens:
                found_indices = [i for qw in q_tokens for i, cw in enumerate(chunk_words) if hebrew_stem(cw) == qw]
                if found_indices:
                    span = max(found_indices) - min(found_indices)
                    density = len(found_indices) / (span + 1)
                    proximity = min(density, 1.0)

            final_score = ((base_vec * w_vec) + (bm_rel * w_bm) + (overlap * w_overlap) + (phrase * w_phrase) + (proximity * w_proximity)) / total_weight
            book_title = self.book_map.get(int(r["bookId"]), f"ספר {int(r['bookId'])}")

            results.append({
                "score": float(final_score),
                "text": chunk_txt,
                "source": f"{book_title}, שורה {int(r['startLine'])}",
                "book_id": int(r["bookId"]),
                "book_title": book_title,
                "features": {"vec": float(base_vec), "bm": float(bm_rel), "overlap": float(overlap), "phrase": float(phrase), "prox": float(proximity)}
            })

        results.sort(key=lambda x: x["score"], reverse=True)
        return results[:top_k]

ENGINE = Engine()

# =========================
# FLASK WEB APP
# =========================
app = Flask(__name__)
app.secret_key = "otzaria_ai_secret_v5"

HTML_TEMPLATE = """
<!DOCTYPE html>
<html lang="he" dir="rtl">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>אוצריא AI</title>
    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
    <link href="https://fonts.googleapis.com/css2?family=Heebo:wght@300;400;500;700&family=Frank+Ruhl+Libre:wght@400;700&display=swap" rel="stylesheet">
    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.10.5/font/bootstrap-icons.css">
    <style>
        :root { --primary-color: #0d6efd; --bg-color: #f8f9fa; --card-bg: #ffffff; }
        body { background-color: var(--bg-color); font-family: 'Heebo', sans-serif; color: #333; }
        .serif-text { font-family: 'Frank Ruhl Libre', serif; }
        .navbar { background: var(--card-bg); border-bottom: 1px solid #eee; }
        .search-container { max-width: 800px; margin: 0 auto; }
        
        /* תיבת החיפוש המקורית */
        .search-box { transition: all 0.3s; border: 1px solid #dfe1e5; border-radius: 24px; background: var(--card-bg); position: relative; z-index: 101;}
        .search-box:hover, .search-box:focus-within { box-shadow: 0 1px 6px rgba(32,33,36,.28); border-color: rgba(223,225,229,0); }
        .search-input { border: none; box-shadow: none !important; font-size: 1.1rem; padding: 12px 20px; border-radius: 24px; background: transparent;}
        
        /* תפריט ההשלמה - מחובר באופן חלק לתיבה */
        .autocomplete-dropdown { 
            top: 100%; left: 0; right: 0; 
            margin-top: -20px; padding-top: 20px; 
            background: #fff; border-radius: 0 0 24px 24px; 
            border: 1px solid #dfe1e5; border-top: none; 
            box-shadow: 0 4px 6px rgba(32,33,36,.28); 
            z-index: 100; overflow: hidden;
            opacity: 0; visibility: hidden; transform: translateY(-10px);
            transition: all 0.2s ease-in-out;
        }
        .autocomplete-dropdown.show-dropdown {
            opacity: 1; visibility: visible; transform: translateY(0);
        }
        .autocomplete-item { padding: 12px 20px; cursor: pointer; display: flex; align-items: center; gap: 12px; color: #202124; font-size: 1.05rem; transition: background 0.1s; }
        .autocomplete-item:hover { background-color: #f8f9fa; }

        .result-card { background: var(--card-bg); border-radius: 12px; padding: 20px; margin-bottom: 16px; border: 1px solid #eee; transition: transform 0.2s; }
        .result-card:hover { transform: translateY(-2px); box-shadow: 0 4px 12px rgba(0,0,0,0.05); }
        .source-badge { background-color: #e7f1ff; color: #0d6efd; padding: 4px 10px; border-radius: 6px; font-size: 0.85rem; font-weight: 600; }
        mark { background-color: #fff3cd; padding: 0 2px; border-radius: 2px; color: #333; }
        .upload-area { border: 2px dashed #dee2e6; border-radius: 12px; padding: 2rem; text-align: center; transition: all 0.2s; background: #fff; cursor:pointer;}
        .upload-area:hover { border-color: var(--primary-color); background-color: #f8f9ff; }
        
        /* Custom styling for Settings Tabs */
        #settingsTabs .nav-link { color: #6c757d; border: none; padding: 12px 16px; transition: all 0.2s; }
        #settingsTabs .nav-link:hover { color: var(--primary-color); background-color: rgba(13, 110, 253, 0.05); }
        #settingsTabs .nav-link.active { color: var(--primary-color); background-color: transparent; border-bottom: 3px solid var(--primary-color); }
        .weight-card { border: 1px solid #e9ecef; border-radius: 8px; padding: 12px 16px; margin-bottom: 12px; background: #fff; transition: border-color 0.2s; }
        .weight-card:hover { border-color: #b6d4fe; }
        .weight-label { font-weight: 600; font-size: 0.95rem; }
        .weight-value { font-weight: bold; color: var(--primary-color); font-family: monospace; font-size: 1.1rem;}
        
        .did-you-mean { font-size: 1.1rem; color: #dd4b39; }
        .did-you-mean a { color: #1a0dab; font-weight: bold; text-decoration: none; font-style: italic;}
        .did-you-mean a:hover { text-decoration: underline; }
        ::-webkit-scrollbar { width: 8px; }
        ::-webkit-scrollbar-track { background: #f1f1f1; }
        ::-webkit-scrollbar-thumb { background: #ccc; border-radius: 4px; }
        
        /* Loading Spinner Overlay */
        #loading-indicator { 
            display: none; 
            position: fixed; 
            top: 0; left: 0; 
            width: 100%; height: 100%; 
            background-color: rgba(255, 255, 255, 0.9); 
            z-index: 9999; 
            flex-direction: column; 
            justify-content: center; 
            align-items: center; 
        }
        #loading-indicator.active { display: flex; }
    </style>
</head>
<body>

<nav class="navbar navbar-expand-lg sticky-top">
    <div class="container">
        <a class="navbar-brand fw-bold d-flex align-items-center gap-2" href="/">
            <i class="bi bi-google text-primary"></i>
            <span>אוצריא <small class="text-muted fw-normal">AI</small></span>
        </a>
        
        <div class="d-flex align-items-center gap-3">
            <div class="d-flex align-items-center small bg-light px-3 py-1 rounded-pill border" id="status-pill" title="סטטוס מערכת">
                <div id="status-dot" class="rounded-circle bg-secondary me-2" style="width: 8px; height: 8px;"></div>
                <span id="status-text" class="text-muted">טוען...</span>
                <span id="idx-count" class="ms-2 fw-bold text-dark">{{ idx_count }}</span>
            </div>
            {% if model_loaded %}
            <button class="btn btn-link text-secondary p-0 fs-5" data-bs-toggle="modal" data-bs-target="#settingsModal" title="הגדרות"><i class="bi bi-gear"></i></button>
            {% endif %}
        </div>
    </div>
</nav>

<div class="container py-5 search-container">
    {% if not model_loaded or not db_exists %}
    <div class="text-center mb-5"><h2 class="fw-bold mb-3">ברוך הבא למנוע החיפוש החכם</h2><p class="text-muted">אנא טען את המקורות בממשק ההגדרות למעלה כדי להתחיל.</p></div>
    <div class="text-center"><button class="btn btn-primary btn-lg" data-bs-toggle="modal" data-bs-target="#settingsModal">פתח הגדרות מערכת</button></div>
    {% else %}
    
    <div class="text-center mb-4 {% if query %}d-none{% endif %}">
        <h1 class="display-4 fw-bold mb-3"><span class="text-primary">O</span><span class="text-danger">t</span><span class="text-warning">z</span><span class="text-primary">a</span><span class="text-success">r</span><span class="text-danger">i</span><span class="text-primary">a</span> AI</h1>
        <p class="text-muted mb-4">מנוע חיפוש סמנטי לארון הספרים היהודי</p>
    </div>

    <form action="/" method="get" class="mb-4" id="searchForm">
        
        <div class="position-relative mx-auto" style="max-width: 600px; z-index: 1000;">
            <div class="search-box d-flex align-items-center shadow-sm" id="searchBoxContainer">
                <button type="submit" class="btn border-0 text-muted ps-3"><i class="bi bi-search"></i></button>
                <input type="text" id="searchInput" name="q" class="form-control search-input" placeholder="חפש משהו (למשל: הלכות שבת)..." value="{{ query or '' }}" autocomplete="off" autofocus>
                {% if query %}
                <a href="/" class="btn border-0 text-muted pe-3" title="נקה"><i class="bi bi-x-lg"></i></a>
                {% endif %}
            </div>
            
            <div id="autocompleteDropdown" class="autocomplete-dropdown position-absolute">
                </div>
        </div>
        
        <div class="d-flex justify-content-center mt-3">
            <select name="book_id" class="form-select form-select-sm w-auto border-0 bg-transparent text-muted" style="cursor: pointer;">
                <option value="">📚 כל הספרים</option>
                {% for bid, title in books.items() %}
                    <option value="{{ bid }}" {% if selected_book|int == bid %}selected{% endif %}>{{ title }}</option>
                {% endfor %}
            </select>
        </div>
    </form>

    {% if did_you_mean %}
    <div class="mb-4 did-you-mean text-center">
        האם התכוונת ל: <a href="/?q={{ did_you_mean }}">{{ did_you_mean }}</a> ?
    </div>
    {% endif %}

    {% if query %}
        <div class="d-flex justify-content-between align-items-center mb-3 px-1 mt-4 border-bottom pb-2">
            <h6 class="mb-0 text-muted">הוצגו {{ results|length }} תוצאות (מתוך מאגר של {{ idx_count }} רשומות)</h6>
        </div>

        {% if not results %}
             <div class="text-center py-5">
                <div class="mb-3"><i class="bi bi-search display-1 text-light"></i></div>
                <h4 class="text-muted">לא נמצאו תוצאות</h4>
                <p class="text-muted small">נסה לחפש במילים אחרות</p>
             </div>
        {% endif %}

        {% for r in results %}
        <div class="result-card">
            <div class="d-flex justify-content-between align-items-start mb-2">
                <span class="source-badge"><i class="bi bi-journal-text me-1"></i> {{ r.source }}</span>
                
                <div class="d-flex align-items-center gap-3">
                    <div class="feedback-group d-flex gap-1" id="fb-{{ loop.index }}">
                        <button class="btn btn-sm btn-outline-success border-0 rounded-circle px-2" title="תוצאה מדויקת - תלמד מזה" onclick='sendFeedback(1, {{ r.features|tojson }}, "fb-{{ loop.index }}")'><i class="bi bi-hand-thumbs-up"></i></button>
                        <button class="btn btn-sm btn-outline-danger border-0 rounded-circle px-2" title="תוצאה גרועה - תקן את האלגוריתם" onclick='sendFeedback(-1, {{ r.features|tojson }}, "fb-{{ loop.index }}")'><i class="bi bi-hand-thumbs-down"></i></button>
                    </div>
                    <small class="text-muted" title="ציון רלוונטיות">{{ "%.0f"|format(r.score * 100) }}%</small>
                </div>
            </div>
            <div class="serif-text fs-5 lh-base text-dark">
                {{ r.text | highlight(expanded_query) | safe }}
            </div>
        </div>
        {% endfor %}
    {% endif %}
    {% endif %}
</div>

<div class="modal fade" id="settingsModal" tabindex="-1" aria-hidden="true">
    <div class="modal-dialog modal-dialog-centered modal-lg">
        <div class="modal-content border-0 shadow-lg">
            <form action="/setup" method="post">
                <div class="modal-header bg-light border-bottom">
                    <h5 class="modal-title fw-bold"><i class="bi bi-sliders me-2 text-primary"></i>הגדרות מערכת מתקדמות</h5>
                    <button type="button" class="btn-close" data-bs-dismiss="modal"></button>
                </div>
                <div class="modal-body p-0">
                    <ul class="nav nav-tabs nav-fill border-bottom-0 bg-light pt-2 px-2" id="settingsTabs" role="tablist">
                        <li class="nav-item" role="presentation"><button class="nav-link active fw-bold" data-bs-toggle="tab" data-bs-target="#general" type="button"><i class="bi bi-hdd-network me-1"></i> מקורות</button></li>
                        <li class="nav-item" role="presentation"><button class="nav-link fw-bold" data-bs-toggle="tab" data-bs-target="#search" type="button"><i class="bi bi-search me-1"></i> חיפוש</button></li>
                        <li class="nav-item" role="presentation"><button class="nav-link fw-bold" data-bs-toggle="tab" data-bs-target="#hybrid" type="button"><i class="bi bi-diagram-3 me-1"></i> משקלי אלגוריתם (למידה)</button></li>
                    </ul>

                    <div class="tab-content p-4" id="settingsTabsContent">
                        <div class="tab-pane fade show active" id="general">
                            <div class="mb-4"><label class="form-label small fw-bold text-muted">נתיב בסיס נתונים (DB)</label><input type="text" name="db_path" class="form-control bg-light" value="{{ db_path }}"></div>
                            <div class="row g-3 mb-4">
                                <div class="col-md-6"><label class="form-label small fw-bold text-muted">גרסת מודל</label>
                                    <select name="edition" class="form-select bg-light">{% for e in ["v1","v2","v3"] %}<option value="{{ e }}" {% if e == edition %}selected{% endif %}>{{ e }}</option>{% endfor %}</select>
                                </div>
                                <div class="col-md-6"><label class="form-label small fw-bold text-muted">מקור מודל</label>
                                    <select name="model_source" class="form-select bg-light"><option value="zip" {% if model_source == "zip" %}selected{% endif %}>ZIP מקומי</option><option value="hf" {% if model_source == "hf" %}selected{% endif %}>HuggingFace</option></select>
                                </div>
                            </div>
                            <div class="mb-4"><label class="form-label small fw-bold text-muted">נתיב ZIP (אופציונלי)</label><input type="text" name="zip_path" class="form-control bg-light" value="{{ zip_path or '' }}" placeholder="models_zips/..."></div>
                        </div>

                        <div class="tab-pane fade" id="search">
                            <div class="mb-4"><label class="form-label small fw-bold text-muted">מספר רשומות לאינדקס</label><input type="number" name="max_chunks" class="form-control bg-light" value="{{ max_chunks }}"></div>
                            <div class="row g-3 mb-4">
                                <div class="col-md-4"><label class="form-label small fw-bold text-muted">מילים למקטע (אידיאלי)</label><input type="number" name="ideal_chunk_words" class="form-control bg-light" value="{{ ideal_chunk_words }}"></div>
                                <div class="col-md-4"><label class="form-label small fw-bold text-muted">מקסימום מילים</label><input type="number" name="max_chunk_words" class="form-control bg-light" value="{{ max_chunk_words }}"></div>
                                <div class="col-md-4"><label class="form-label small fw-bold text-muted">חפיפת מילים</label><input type="number" name="overlap_words" class="form-control bg-light" value="{{ overlap_words }}"></div>
                            </div>
                            <div class="row g-3 mb-3">
                                <div class="col-md-6"><label class="form-label small fw-bold text-muted">תוצאות מרביות להצגה</label><input type="number" name="top_k" min="1" max="200" class="form-control bg-light" value="{{ top_k }}"></div>
                                <div class="col-md-6"><label class="form-label small fw-bold text-muted">סף דמיון מינימלי (%)</label><input type="number" name="min_score" min="0" max="100" step="1" class="form-control bg-light" value="{{ min_score }}"></div>
                            </div>
                        </div>

                        <div class="tab-pane fade" id="hybrid">
                            <div class="d-flex justify-content-between align-items-start mb-4">
                                <p class="text-muted small mb-0 w-75">שליטה באופן שבו המערכת מדרגת תוצאות. המערכת גם לומדת בעצמה כשתלחץ על לייק/דיסלייק בתוצאות!</p>
                                <button type="button" class="btn btn-sm btn-outline-primary" onclick="setRecommendedWeights()"><i class="bi bi-stars me-1"></i> שחזר למומלצים</button>
                            </div>
                            {% for key, val, label, rec in [("w_vec", w_vec, "חיפוש סמנטי (Vector)", "0.35"), ("w_bm", w_bm, "חיפוש טקסטואלי + הרחבה", "0.25"), ("w_overlap", w_overlap, "חפיפת מילים", "0.25"), ("w_phrase", w_phrase, "ביטוי מלא", "0.10"), ("w_proximity", w_proximity, "קרבה (Proximity)", "0.05")] %}
                            <div class="weight-card">
                                <div class="d-flex justify-content-between align-items-center mb-1">
                                    <div class="weight-label">{{ label }} <span class="badge bg-light text-primary border ms-2 fw-normal">מומלץ: {{ rec }}</span></div>
                                    <div class="weight-value" id="val_{{ key }}">{{ val }}</div>
                                </div>
                                <input type="range" min="0" max="1" step="0.01" name="{{ key }}" value="{{ val }}" class="form-range" oninput="updateWeight('{{ key }}', this.value)" onchange="autoNormalize()">
                            </div>
                            {% endfor %}
                            <div class="mt-3 p-3 bg-light rounded d-flex justify-content-between align-items-center border">
                                <div class="total-indicator">סכום משקלים: <strong id="total_weight">0.00</strong></div>
                                <span id="total_badge" class="badge bg-secondary px-3 py-2 rounded-pill">...</span>
                            </div>
                        </div>
                    </div>
                </div>
                <div class="modal-footer bg-light border-top">
                    <button type="button" class="btn btn-outline-secondary px-4" data-bs-dismiss="modal">ביטול</button>
                    <button type="submit" class="btn btn-primary px-4"><i class="bi bi-check2 me-1"></i> שמור</button>
                </div>
            </form>
        </div>
    </div>
</div>

<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
<script>
    // --- Autocomplete JS (Smooth & Contextual) ---
    const searchInput = document.getElementById('searchInput');
    const autocompleteDropdown = document.getElementById('autocompleteDropdown');
    const searchBoxContainer = document.getElementById('searchBoxContainer');
    const loadingIndicator = document.getElementById('loading-indicator');
    const searchForm = document.getElementById('searchForm');

    function showLoading() {
        if(loadingIndicator) loadingIndicator.classList.add('active');
        const btn = document.querySelector('#searchBoxContainer button[type="submit"]');
        if(btn) btn.innerHTML = '<div class="spinner-border spinner-border-sm" role="status"></div>';
    }

    window.addEventListener('pageshow', function(event) {
        if (loadingIndicator) loadingIndicator.classList.remove('active');
        const btn = document.querySelector('#searchBoxContainer button[type="submit"]');
        if(btn) btn.innerHTML = '<i class="bi bi-search"></i>';
    });

    if(searchForm) searchForm.addEventListener('submit', showLoading);

    let debounceTimer;

    if(searchInput) {
        searchInput.addEventListener('input', function() {
            clearTimeout(debounceTimer);
            const val = this.value.trim();
            
            if(val.length < 2) { 
                autocompleteDropdown.classList.remove('show-dropdown'); 
                searchBoxContainer.style.borderRadius = "24px";
                return; 
            }
            
            debounceTimer = setTimeout(() => {
                fetch('/api/autocomplete?q=' + encodeURIComponent(val))
                .then(r => r.json())
                .then(suggestions => {
                    if(suggestions.length > 0) {
                        autocompleteDropdown.innerHTML = suggestions.map(s => 
                            `<div class="autocomplete-item"><i class="bi bi-search text-muted"></i> ${s}</div>`
                        ).join('');
                        autocompleteDropdown.classList.add('show-dropdown');
                        searchBoxContainer.style.borderRadius = "24px 24px 0 0";
                        
                        document.querySelectorAll('.autocomplete-item').forEach(item => {
                            item.addEventListener('click', function() {
                                searchInput.value = this.innerText.trim();
                                showLoading();
                                document.getElementById('searchForm').submit();
                            });
                        });
                    } else {
                        autocompleteDropdown.classList.remove('show-dropdown');
                        searchBoxContainer.style.borderRadius = "24px";
                    }
                });
            }, 150); 
        });

        document.addEventListener('click', function(e) {
            if(!searchBoxContainer.contains(e.target) && !autocompleteDropdown.contains(e.target)) {
                autocompleteDropdown.classList.remove('show-dropdown');
                searchBoxContainer.style.borderRadius = "24px";
            }
        });
    }

    // --- Feedback Machine Learning Loop ---
    function sendFeedback(rating, features, elementId) {
        fetch('/api/feedback', {
            method: 'POST',
            headers: {'Content-Type': 'application/json'},
            body: JSON.stringify({ rating: rating, features: features })
        }).then(res => res.json()).then(data => {
            if(data.status === 'ok') {
                document.getElementById(elementId).innerHTML = '<span class="text-success small fw-bold"><i class="bi bi-check2-all me-1"></i> האלגוריתם למד!</span>';
                // Update Settings sliders in background
                Object.keys(data.new_weights).forEach(k => {
                    let s = document.querySelector(`[name='${k}']`);
                    if(s) { s.value = data.new_weights[k]; document.getElementById('val_' + k).innerText = parseFloat(data.new_weights[k]).toFixed(2); }
                });
                if(typeof updateTotal === 'function') updateTotal();
            }
        });
    }

    // --- Settings UI ---
    const WEIGHT_FIELDS = ["w_vec", "w_bm", "w_overlap", "w_phrase", "w_proximity"];
    const RECOMMENDED = { "w_vec": 0.35, "w_bm": 0.25, "w_overlap": 0.25, "w_phrase": 0.10, "w_proximity": 0.05 };
    function updateWeight(name, value) { document.getElementById('val_' + name).innerText = parseFloat(value).toFixed(2); updateTotal(); }
    function updateTotal() {
        let total = 0;
        WEIGHT_FIELDS.forEach(f => { let el = document.querySelector(`[name='${f}']`); if(el) total += parseFloat(el.value); });
        let totalEl = document.getElementById("total_weight"); if(totalEl) totalEl.innerText = total.toFixed(2);
        const badge = document.getElementById("total_badge");
        if(badge) {
            if (Math.abs(total - 1.00) < 0.001) { badge.className = "badge bg-success px-3 py-2"; badge.innerHTML = "<i class='bi bi-check-circle me-1'></i> מנורמל (1.00)"; } 
            else { badge.className = "badge bg-warning text-dark px-3 py-2"; badge.innerHTML = "<i class='bi bi-arrow-clockwise me-1'></i> ינורמל בעזיבה"; }
        }
    }
    function autoNormalize() {
        let total = 0;
        WEIGHT_FIELDS.forEach(f => { let el = document.querySelector(`[name='${f}']`); if(el) total += parseFloat(el.value); });
        if (total === 0) return;
        WEIGHT_FIELDS.forEach(f => { let el = document.querySelector(`[name='${f}']`); if(el) { let n = (parseFloat(el.value) / total).toFixed(2); el.value = n; document.getElementById('val_' + f).innerText = n; }});
        updateTotal();
    }
    function setRecommendedWeights() { WEIGHT_FIELDS.forEach(f => { let el = document.querySelector(`[name='${f}']`); if(el) { el.value = RECOMMENDED[f]; document.getElementById('val_' + f).innerText = RECOMMENDED[f].toFixed(2); }}); updateTotal(); }
    document.getElementById('settingsModal').addEventListener('show.bs.modal', updateTotal);

    function updateStatus() {
        fetch('/status').then(r => r.json()).then(data => {
            const textEl = document.getElementById('status-text'), dotEl = document.getElementById('status-dot'), countEl = document.getElementById('idx-count');
            if(textEl) textEl.innerText = data.msg;
            if(dotEl) { dotEl.className = data.state === 'ready' ? 'rounded-circle bg-success me-2' : (data.state === 'indexing' || data.state === 'downloading' ? 'rounded-circle bg-warning me-2 spinner-grow spinner-grow-sm' : (data.state === 'error' ? 'rounded-circle bg-danger me-2' : 'rounded-circle bg-secondary me-2')); }
            if(data.count && countEl) countEl.innerText = data.count.toLocaleString();
            setTimeout(updateStatus, (data.state === 'indexing' || data.state === 'downloading') ? 1000 : 5000);
        }).catch(e => setTimeout(updateStatus, 5000));
    }
    document.addEventListener('DOMContentLoaded', updateStatus);
</script>

</body>
</html>
"""

# =========================
# HELPER FILTERS
# =========================
def highlight_text(text, query):
    if not query: return text
    q_words = [hebrew_stem(w) for w in clean_text(query).split() if len(w) > 1]
    if not q_words: return text
    patterns = [r'(?:^|[\s\"\'\-])([ו|מ|ש|ה|ל|ב|כ]?' + re.escape(w) + r')(?=[\s\"\'\.\,\-]|$)' for w in q_words]
    combined_pattern = "|".join(patterns)
    def replacer(match):
        full_match = match.group(0)
        word_match = re.search(r'[א-ת]+', full_match)
        if word_match: return full_match.replace(word_match.group(0), f'<mark>{word_match.group(0)}</mark>')
        return full_match
    try: return re.sub(combined_pattern, replacer, text)
    except: return text

@app.template_filter('highlight')
def highlight_filter(text, query): return highlight_text(text, query)

# =========================
# ROUTES
# =========================
@app.route("/")
def index():
    q = request.args.get("q", "").strip()
    book_id_str = request.args.get("book_id", "")
    book_id = int(book_id_str) if book_id_str.isdigit() else None

    cfg = ENGINE.last_cfg or {}
    top_k = int(cfg.get("top_k", DEFAULT_TOP_K))
    min_score = float(cfg.get("min_score", DEFAULT_MIN_SCORE)) / 100.0

    results = []
    did_you_mean = None
    expanded_query = q  # הוספנו את האתחול כאן כדי למנוע את השגיאה
    all_books = ENGINE.book_map.copy()
    sorted_books = dict(sorted(all_books.items(), key=lambda item: item[1])[:800])

    if q:
        if not ENGINE.built and ENGINE.status["state"] not in ("indexing", "downloading"):
            def task():
                try:
                    ENGINE.load_resources(cfg.get("db_path", DEFAULT_DB_PATH), cfg.get("edition", "v3"), model_source=cfg.get("model_source", "zip"), zip_path=cfg.get("zip_path", ""))
                    ENGINE.build_index(
                        cfg.get("db_path", DEFAULT_DB_PATH), 
                        int(cfg.get("max_chunks", 100000)),
                        int(cfg.get("ideal_chunk_words", IDEAL_CHUNK_WORDS)),
                        int(cfg.get("max_chunk_words", MAX_CHUNK_WORDS)),
                        int(cfg.get("overlap_words", DEFAULT_OVERLAP_WORDS))
                    )
                except Exception as e: ENGINE._update("error", f"שגיאה: {e}", 0)
            threading.Thread(target=task, daemon=True).start()
        else:
            # בדיקת איות (Spell Check)
            correction = ENGINE.check_spelling(q)
            if correction and correction != clean_text(q):
                did_you_mean = correction
            # חילוץ מילים להדגשה (כולל מילים נרדפות מההרחבה)
            expanded_query = q
            if ENGINE.model:
                expanded_terms = ENGINE.get_expanded_terms(clean_text(q))
                expanded_query = " ".join(expanded_terms)

            raw = ENGINE.search(q, book_filter=book_id, top_k=top_k * 3)
            filtered = [r for r in raw if r["score"] >= min_score]
            results = filtered[:top_k]

    idx_c = ENGINE.built.count if ENGINE.built else 0
    current_db = cfg.get("db_path", DEFAULT_DB_PATH)
    db_exists = os.path.exists(current_db)
    model_loaded = (ENGINE.model is not None)

    return render_template_string(
            HTML_TEMPLATE, model_loaded=model_loaded, db_exists=db_exists, query=q, did_you_mean=did_you_mean, 
            results=results, db_path=current_db, edition=cfg.get("edition", "v3"), max_chunks=int(cfg.get("max_chunks", 100000)), 
            model_source=cfg.get("model_source", "zip"), zip_path=cfg.get("zip_path", ""), idx_count=idx_c, books=sorted_books, 
            selected_book=book_id, top_k=top_k, min_score=int(cfg.get("min_score", 0)),
            ideal_chunk_words=int(cfg.get("ideal_chunk_words", IDEAL_CHUNK_WORDS)),
            max_chunk_words=int(cfg.get("max_chunk_words", MAX_CHUNK_WORDS)),
            overlap_words=int(cfg.get("overlap_words", DEFAULT_OVERLAP_WORDS)),
            w_vec=cfg.get("w_vec", 0.35), w_bm=cfg.get("w_bm", 0.25), w_overlap=cfg.get("w_overlap", 0.25), w_phrase=cfg.get("w_phrase", 0.10), w_proximity=cfg.get("w_proximity", 0.05),
            expanded_query=expanded_query # <--- הוספנו את זה כאן
        )

@app.route("/api/autocomplete")
def autocomplete():
    """השלמה אוטומטית מבוססת הקשר מתוך מסד הנתונים (FTS5)"""
    q = request.args.get("q", "").strip()
    if not q or len(q) < 2 or not ENGINE.built or not ENGINE.built.meta_db_path: return jsonify([])

    clean_q = clean_text(q)
    if not clean_q: return jsonify([])

    words = clean_q.split()

    # שיטה 1: חיפוש ההמשך המדויק מתוך ארון הספרים
    fts_query = f'"{clean_q}"*'

    con = sqlite3.connect(ENGINE.built.meta_db_path, timeout=5)
    con.row_factory = sqlite3.Row
    try:
        rows = con.execute(
            "SELECT text FROM chunks_fts WHERE chunks_fts MATCH ? LIMIT 15",
            (fts_query,)
        ).fetchall()

        suggestions = []
        seen = set()

        for r in rows:
            txt = clean_text(r["text"])
            idx = txt.find(clean_q)
            if idx != -1:
                snippet = txt[idx:]
                snippet_words = snippet.split()
                target_len = len(words)
                
                if len(snippet_words) > target_len:
                    completion = " ".join(snippet_words[:target_len + 1])
                    if completion not in seen:
                        seen.add(completion)
                        suggestions.append(completion)

        if suggestions:
            return jsonify(suggestions[:6])

    except Exception as e:
        pass
    finally:
        con.close()

    # שיטה 2: פולבק למילון המילים (מופעל *רק* אם המשתמש מקליד מילה בודדת)
    if len(words) == 1:
        last_word = words[-1]
        matches = [w for w in ENGINE.model.word_freqs if w.startswith(last_word)]
        matches.sort(key=lambda w: ENGINE.model.word_freqs[w], reverse=True)
        return jsonify(matches[:6])

    return jsonify([])

@app.route("/api/feedback", methods=["POST"])
def feedback():
    """למידת מכונה (ML) שמכוונת את המערכת לפי פידבק בזמן אמת"""
    data = request.json
    rating = float(data.get("rating", 0))
    feats = data.get("features", {})
    if rating == 0 or not feats: return jsonify({"status": "error"})
        
    cfg = load_settings()
    weights = {k: float(cfg.get(k, v)) for k, v in [("w_vec", 0.35), ("w_bm", 0.25), ("w_overlap", 0.25), ("w_phrase", 0.10), ("w_proximity", 0.05)]}
    
    lr = 0.05 
    weights["w_vec"] = max(0.01, weights["w_vec"] + lr * rating * feats.get("vec", 0))
    weights["w_bm"] = max(0.01, weights["w_bm"] + lr * rating * feats.get("bm", 0))
    weights["w_overlap"] = max(0.01, weights["w_overlap"] + lr * rating * feats.get("overlap", 0))
    weights["w_phrase"] = max(0.01, weights["w_phrase"] + lr * rating * feats.get("phrase", 0))
    weights["w_proximity"] = max(0.01, weights["w_proximity"] + lr * rating * feats.get("prox", 0))
    
    total = sum(weights.values())
    for k in weights: cfg[k] = round(weights[k] / total, 3)
        
    save_settings(cfg)
    ENGINE.last_cfg = cfg
    return jsonify({"status": "ok", "new_weights": cfg})

@app.route("/setup", methods=["POST"])
def setup():
    cfg = load_settings()
    
    # שמירת הערכים הישנים להשוואה
    old_db = cfg.get("db_path", DEFAULT_DB_PATH)
    old_edition = cfg.get("edition", "v3")
    old_max_chunks = int(cfg.get("max_chunks", 100000))
    old_source = cfg.get("model_source", "zip")
    old_zip = cfg.get("zip_path", "")
    old_ideal = int(cfg.get("ideal_chunk_words", IDEAL_CHUNK_WORDS))
    old_max_w = int(cfg.get("max_chunk_words", MAX_CHUNK_WORDS))
    old_overlap = int(cfg.get("overlap_words", DEFAULT_OVERLAP_WORDS))

    # קריאת הערכים החדשים
    new_db = request.form.get("db_path", DEFAULT_DB_PATH).strip()
    new_edition = request.form.get("edition", "v3").strip()
    new_max_chunks = int(request.form.get("max_chunks", 100000))
    new_source = request.form.get("model_source", "zip").strip()
    new_zip = request.form.get("zip_path", "").strip()
    new_ideal = int(request.form.get("ideal_chunk_words", IDEAL_CHUNK_WORDS))
    new_max_w = int(request.form.get("max_chunk_words", MAX_CHUNK_WORDS))
    new_overlap = int(request.form.get("overlap_words", DEFAULT_OVERLAP_WORDS))

    cfg.update({
        "db_path": new_db,
        "edition": new_edition,
        "max_chunks": new_max_chunks,
        "model_source": new_source,
        "zip_path": new_zip,
        "ideal_chunk_words": new_ideal,
        "max_chunk_words": new_max_w,
        "overlap_words": new_overlap,
        "top_k": int(request.form.get("top_k", 20)),
        "min_score": float(request.form.get("min_score", 0)),
        "w_vec": float(request.form.get("w_vec", 0.35)),
        "w_bm": float(request.form.get("w_bm", 0.25)),
        "w_overlap": float(request.form.get("w_overlap", 0.25)),
        "w_phrase": float(request.form.get("w_phrase", 0.10)),
        "w_proximity": float(request.form.get("w_proximity", 0.05)),
    })
    save_settings(cfg)
    ENGINE.last_cfg = cfg

    # בדיקה אם נדרש טעינה מחדש (אם השתנו פרמטרים מבניים)
    if (new_db != old_db or new_edition != old_edition or 
        new_max_chunks != old_max_chunks or new_source != old_source or new_zip != old_zip or
        new_ideal != old_ideal or new_max_w != old_max_w or new_overlap != old_overlap):
        
        def reload_task():
            try:
                # אם השתנה המודל או ה-DB (או שטרם נטען מודל), נטען משאבים מחדש
                if (not ENGINE.model or 
                    new_edition != old_edition or 
                    new_source != old_source or 
                    new_zip != old_zip or 
                    new_db != old_db):
                    ENGINE.load_resources(new_db, new_edition, new_source, new_zip)
                
                # בניית אינדקס (או טעינה אם קיים)
                ENGINE.build_index(new_db, new_max_chunks, new_ideal, new_max_w, new_overlap)
            except Exception as e:
                ENGINE._update("error", f"שגיאה בטעינה מחדש: {e}", 0)
        
        threading.Thread(target=reload_task, daemon=True).start()

    return redirect("/")

@app.route("/status")
def status_api():
    s = ENGINE.status.copy()
    if ENGINE.built: s["count"] = ENGINE.built.count
    return jsonify(s)

# Helper UI Routes
@app.route("/upload_model", methods=["POST"])
def upload_model():
    file = request.files.get('file')
    if file and file.filename:
        target = os.path.join(MODELS_ZIPS_DIR, secure_filename(file.filename))
        file.save(target)
        cfg = load_settings()
        cfg.update({"model_source": "zip", "zip_path": target})
        save_settings(cfg)
        threading.Thread(target=lambda: (
            ENGINE.load_resources(cfg.get("db_path", DEFAULT_DB_PATH), cfg.get("edition", "v3"), "zip", target), 
            ENGINE.build_index(cfg.get("db_path", DEFAULT_DB_PATH), int(cfg.get("max_chunks", 100000)),
                               int(cfg.get("ideal_chunk_words", IDEAL_CHUNK_WORDS)),
                               int(cfg.get("max_chunk_words", MAX_CHUNK_WORDS)),
                               int(cfg.get("overlap_words", DEFAULT_OVERLAP_WORDS)))
        )).start()
    return redirect("/")

@app.route("/download_db", methods=["POST"])
def download_db():
    def task():
        try:
            ENGINE._update("downloading", "מוריד מסד נתונים...", 0)
            zip_path = os.path.join(DB_DIR, "seforim.zip")
            with requests.get(DB_DOWNLOAD_URL, stream=True) as r:
                total_len = int(r.headers.get('content-length', 0))
                dl = 0
                with open(zip_path, 'wb') as f:
                    for chunk in r.iter_content(8192):
                        if chunk:
                            dl += len(chunk); f.write(chunk)
                            if total_len: ENGINE._update("downloading", "מוריד מסד נתונים...", int((dl/total_len)*100))
            ENGINE._update("indexing", "מחלץ...", 0)
            with zipfile.ZipFile(zip_path, "r") as z: z.extractall(DB_DIR)
            db_file = next((os.path.join(r, f) for r, d, files in os.walk(DB_DIR) for f in files if f.endswith((".db", ".sqlite"))), None)
            if db_file:
                cfg = load_settings()
                cfg["db_path"] = db_file
                save_settings(cfg); ENGINE.last_cfg = cfg
                ENGINE._update("ready", "הסתיים בהצלחה", 100)
        except Exception as e: ENGINE._update("error", str(e), 0)
    threading.Thread(target=task, daemon=True).start()
    return redirect("/")

@app.route("/upload_db", methods=["POST"])
def upload_db():
    file = request.files.get('file')
    if file and file.filename:
        target = os.path.join(DB_DIR, secure_filename(file.filename))
        file.save(target)
        def task():
            try:
                db_file = target
                if target.lower().endswith(".zip"):
                    ENGINE._update("indexing", "מחלץ...", 0)
                    with zipfile.ZipFile(target, 'r') as z: z.extractall(DB_DIR)
                    db_file = next((os.path.join(r, f) for r, d, files in os.walk(DB_DIR) for f in files if f.endswith((".db", ".sqlite"))), None)
                if db_file:
                    cfg = load_settings(); cfg["db_path"] = db_file; save_settings(cfg)
                    ENGINE.book_map = get_book_titles(db_file)
                    if cfg.get("model_source"):
                        ENGINE.load_resources(db_file, cfg.get("edition", "v3"), cfg["model_source"], cfg.get("zip_path", ""))
                        ENGINE.build_index(db_file, int(cfg.get("max_chunks", 100000)),
                                           int(cfg.get("ideal_chunk_words", IDEAL_CHUNK_WORDS)),
                                           int(cfg.get("max_chunk_words", MAX_CHUNK_WORDS)),
                                           int(cfg.get("overlap_words", DEFAULT_OVERLAP_WORDS)))
            except Exception as e: ENGINE._update("error", str(e), 0)
        threading.Thread(target=task).start()
    return redirect("/")

@app.route("/select_local_db")
def select_local_db():
    root = tk.Tk(); root.withdraw(); root.attributes("-topmost", True)
    path = filedialog.askopenfilename(title="בחר מסד נתונים", filetypes=[("DB", "*.db *.sqlite"), ("ZIP", "*.zip")])
    root.destroy()
    if path:
        cfg = load_settings(); cfg["db_path"] = path; save_settings(cfg); ENGINE.last_cfg = cfg
    return redirect("/")

@app.route("/select_local_zip")
def select_local_zip():
    root = tk.Tk(); root.withdraw(); root.attributes("-topmost", True)
    path = filedialog.askopenfilename(title="בחר מודל", filetypes=[("ZIP", "*.zip")])
    root.destroy()
    if path:
        cfg = load_settings(); cfg.update({"model_source": "zip", "zip_path": path}); save_settings(cfg); ENGINE.last_cfg = cfg
    return redirect("/")

# =========================
# MAIN
# =========================
if __name__ == "__main__":
    cfg = load_settings()
    ENGINE.last_cfg = cfg
    def boot():
        try:
            ENGINE.load_resources(cfg.get("db_path", DEFAULT_DB_PATH), cfg.get("edition", "v3"), cfg.get("model_source", "zip"), cfg.get("zip_path", ""))
            ENGINE.build_index(cfg.get("db_path", DEFAULT_DB_PATH), int(cfg.get("max_chunks", 100000)),
                               int(cfg.get("ideal_chunk_words", IDEAL_CHUNK_WORDS)),
                               int(cfg.get("max_chunk_words", MAX_CHUNK_WORDS)),
                               int(cfg.get("overlap_words", DEFAULT_OVERLAP_WORDS)))
        except Exception as e: ENGINE._update("error", f"שגיאה בהפעלה: {e}", 0)
    
    # Only start the heavy boot process if we are not in the reloader's main process (to avoid double loading)
    if os.environ.get("WERKZEUG_RUN_MAIN") == "true" or not app.debug:
        threading.Thread(target=boot, daemon=True).start()

    print("Starting DeepSearch Google-like AI at http://127.0.0.1:8000")
    # use_reloader=False is another option, but checking WERKZEUG_RUN_MAIN is safer if you want debug features
    app.run(host="127.0.0.1", port=8000, debug=True)