Spaces:

UnMelow
/

422_tasks

Sleeping

App Files Files Community

UnMelow commited on 21 days ago

Commit

e22a543

verified ·

1 Parent(s): 5c95655

Update app.py

Browse files

Files changed (1) hide show

app.py +623 -615

app.py CHANGED Viewed

@@ -1,673 +1,681 @@
 import os
-import random
 import math
 from dataclasses import dataclass
-from typing import List, Tuple, Dict, Optional
-import gradio as gr
 import torch
-from PIL import Image, ImageDraw, ImageFont
 from transformers import (
     AutoTokenizer,
     AutoModel,
-    AutoModelForSeq2SeqLM,
-    AutoModelForCausalLM,
 )
-# ============================================================
-# CPU setup
-# ============================================================
 os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
 DEVICE = torch.device("cpu")
-torch.set_num_threads(int(os.getenv("TORCH_NUM_THREADS", "4")))
-# ============================================================
-# 3 Transformers (minimum)
-#   1) Coach (Seq2Seq)
-#   2) Opponent (Causal LM)
-#   3) Embeddings (Encoder)
-# ============================================================
-COACH_MODEL_NAME = os.getenv("COACH_MODEL", "google/flan-t5-small")
-OPP_MODEL_NAME = os.getenv("OPP_MODEL", "distilgpt2")
-EMB_MODEL_NAME = os.getenv("EMB_MODEL", "sentence-transformers/paraphrase-MiniLM-L3-v2")
-coach_tok = AutoTokenizer.from_pretrained(COACH_MODEL_NAME)
-coach_model = AutoModelForSeq2SeqLM.from_pretrained(COACH_MODEL_NAME).eval().to(DEVICE)
-opp_tok = AutoTokenizer.from_pretrained(OPP_MODEL_NAME)
-opp_model = AutoModelForCausalLM.from_pretrained(OPP_MODEL_NAME).eval().to(DEVICE)
-emb_tok = AutoTokenizer.from_pretrained(EMB_MODEL_NAME)
-emb_model = AutoModel.from_pretrained(EMB_MODEL_NAME).eval().to(DEVICE)
-# ============================================================
-# Checkers engine (English draughts-like)
-# Pieces:
-#   '.' empty
-#   'w' white man (user)
-#   'W' white king
-#   'b' black man (bot)
-#   'B' black king
-#
-# Coordinates:
-#   internal: r=0..7 top->bottom, c=0..7 left->right
-#   dark squares: (r+c)%2==1
-# Move string:
-#   "b6-a5" or "c3-e5-g7" using a-h and 1-8 (1 is bottom row).
-# ============================================================
-def inside(r: int, c: int) -> bool:
-    return 0 <= r < 8 and 0 <= c < 8
-def is_dark(r: int, c: int) -> bool:
-    return (r + c) % 2 == 1
-def rc_to_alg(r: int, c: int) -> str:
-    # a1 bottom-left => internal (7,0)
-    file_ = chr(ord("a") + c)
-    rank = str(8 - r)
-    return f"{file_}{rank}"
-def alg_to_rc(s: str) -> Tuple[int, int]:
-    s = s.strip().lower()
-    c = ord(s[0]) - ord("a")
-    r = 8 - int(s[1])
-    return r, c
-def move_seq_to_str(seq: List[Tuple[int, int]]) -> str:
-    return "-".join(rc_to_alg(r, c) for r, c in seq)
-def move_str_to_seq(s: str) -> List[Tuple[int, int]]:
-    parts = [p.strip() for p in s.split("-") if p.strip()]
-    return [alg_to_rc(p) for p in parts]
-def piece_color(p: str) -> Optional[str]:
-    if p in ("w", "W"):
-        return "w"
-    if p in ("b", "B"):
-        return "b"
-    return None
-def is_king(p: str) -> bool:
-    return p in ("W", "B")
-@dataclass
-class GameState:
-    board: List[List[str]]
-    turn: str  # "w" user, "b" bot
-    history: List[str]
-    last_analysis: str
-def initial_board() -> List[List[str]]:
-    b = [["." for _ in range(8)] for _ in range(8)]
-    # Black at top rows 0-2 on dark squares
-    for r in range(0, 3):
-        for c in range(8):
-            if is_dark(r, c):
-                b[r][c] = "b"
-    # White at bottom rows 5-7 on dark squares
-    for r in range(5, 8):
-        for c in range(8):
-            if is_dark(r, c):
-                b[r][c] = "w"
-    return b
-def clone_board(board: List[List[str]]) -> List[List[str]]:
-    return [row[:] for row in board]
-def board_to_ascii(board: List[List[str]]) -> str:
-    # compact representation for prompting
-    lines = []
-    for r in range(8):
-        lines.append("".join(board[r]))
-    return "\n".join(lines)
-def count_material(board: List[List[str]]) -> Dict[str, float]:
-    score = {"w": 0.0, "b": 0.0}
-    for r in range(8):
-        for c in range(8):
-            p = board[r][c]
-            if p == "w":
-                score["w"] += 1.0
-            elif p == "W":
-                score["w"] += 1.6
-            elif p == "b":
-                score["b"] += 1.0
-            elif p == "B":
-                score["b"] += 1.6
-    return score
-def promote_if_needed(p: str, r: int) -> str:
-    if p == "w" and r == 0:
-        return "W"
-    if p == "b" and r == 7:
-        return "B"
-    return p
-# ----------------------------
-# Move generation
-# ----------------------------
-def move_dirs(p: str) -> List[Tuple[int, int]]:
-    # movement directions (step)
-    if p == "w":
-        return [(-1, -1), (-1, +1)]
-    if p == "b":
-        return [(+1, -1), (+1, +1)]
-    # kings
-    if p in ("W", "B"):
-        return [(-1, -1), (-1, +1), (+1, -1), (+1, +1)]
-    return []
-def capture_dirs(p: str) -> List[Tuple[int, int]]:
-    # English draughts: men capture forward only; kings both ways
-    return move_dirs(p)
-def gen_simple_moves(board: List[List[str]], color: str) -> List[List[Tuple[int, int]]]:
-    moves = []
-    for r in range(8):
-        for c in range(8):
-            p = board[r][c]
-            if piece_color(p) != color:
-                continue
-            for dr, dc in move_dirs(p):
-                r2, c2 = r + dr, c + dc
-                if inside(r2, c2) and board[r2][c2] == ".":
-                    moves.append([(r, c), (r2, c2)])
-    return moves
-def gen_captures_from(board: List[List[str]], r: int, c: int, p: str) -> List[List[Tuple[int, int]]]:
     """
-    Returns capture sequences starting at (r,c), including start and landings.
-    If man reaches king row during capture, we stop (promotion at end of move).
     """
-    color = piece_color(p)
-    assert color in ("w", "b")
-    sequences = []
-    found_any = False
-    for dr, dc in capture_dirs(p):
-        r_mid, c_mid = r + dr, c + dc
-        r2, c2 = r + 2 * dr, c + 2 * dc
-        if not (inside(r2, c2) and inside(r_mid, c_mid)):
-            continue
-        mid_piece = board[r_mid][c_mid]
-        if mid_piece == ".":
-            continue
-        if piece_color(mid_piece) == color:
-            continue
-        if board[r2][c2] != ".":
-            continue
-        # perform capture on a cloned board
-        nb = clone_board(board)
-        nb[r][c] = "."
-        nb[r_mid][c_mid] = "."
-        nb[r2][c2] = p  # promotion deferred
-        # stop extending if this is a man that reaches king row
-        if (p == "w" and r2 == 0) or (p == "b" and r2 == 7):
-            sequences.append([(r, c), (r2, c2)])
-            found_any = True
-            continue
-        tails = gen_captures_from(nb, r2, c2, p)
-        if tails:
-            for t in tails:
-                sequences.append([(r, c)] + t[1:])
-            found_any = True
-        else:
-            sequences.append([(r, c), (r2, c2)])
-            found_any = True
-    return sequences if found_any else []
-def gen_legal_moves(board: List[List[str]], color: str) -> List[List[Tuple[int, int]]]:
-    captures = []
-    for r in range(8):
-        for c in range(8):
-            p = board[r][c]
-            if piece_color(p) != color:
                 continue
-            caps = gen_captures_from(board, r, c, p)
-            captures.extend(caps)
-    # forced capture rule
-    if captures:
-        # remove duplicates (can arise via different recursion paths)
-        uniq = {}
-        for seq in captures:
-            key = tuple(seq)
-            uniq[key] = seq
-        return list(uniq.values())
-    return gen_simple_moves(board, color)
-def apply_move(board: List[List[str]], seq: List[Tuple[int, int]]) -> List[List[str]]:
-    nb = clone_board(board)
-    (r0, c0) = seq[0]
-    p = nb[r0][c0]
-    nb[r0][c0] = "."
-    for i in range(1, len(seq)):
-        (r1, c1) = seq[i - 1]
-        (r2, c2) = seq[i]
-        # capture if jump
-        if abs(r2 - r1) == 2 and abs(c2 - c1) == 2:
-            rm = (r1 + r2) // 2
-            cm = (c1 + c2) // 2
-            nb[rm][cm] = "."
-    (rf, cf) = seq[-1]
-    p2 = promote_if_needed(p, rf)
-    nb[rf][cf] = p2
-    return nb
-def winner(board: List[List[str]]) -> Optional[str]:
-    # winner if opponent has no pieces or no moves
-    w_cnt = 0
-    b_cnt = 0
-    for r in range(8):
-        for c in range(8):
-            if board[r][c] in ("w", "W"):
-                w_cnt += 1
-            elif board[r][c] in ("b", "B"):
-                b_cnt += 1
-    if w_cnt == 0:
-        return "b"
-    if b_cnt == 0:
-        return "w"
-    if not gen_legal_moves(board, "w"):
-        return "b"
-    if not gen_legal_moves(board, "b"):
-        return "w"
-    return None
-# ============================================================
-# Simple engine for analysis (not a transformer):
-# minimax on material + mobility, small depth for CPU.
-# ============================================================
-def eval_board(board: List[List[str]]) -> float:
-    m = count_material(board)
-    # positive => good for white
-    score = (m["w"] - m["b"])
-    # mobility bonus
-    score += 0.04 * (len(gen_legal_moves(board, "w")) - len(gen_legal_moves(board, "b")))
-    return score
-def minimax(board: List[List[str]], color: str, depth: int, alpha: float, beta: float) -> Tuple[float, Optional[List[Tuple[int, int]]]]:
-    win = winner(board)
-    if win == "w":
-        return 10_000.0, None
-    if win == "b":
-        return -10_000.0, None
-    if depth == 0:
-        return eval_board(board), None
-    moves = gen_legal_moves(board, color)
-    if not moves:
-        # no moves => lose
-        return (-10_000.0 if color == "w" else 10_000.0), None
-    best_move = None
-    if color == "w":
-        best = -math.inf
-        for mv in moves:
-            nb = apply_move(board, mv)
-            val, _ = minimax(nb, "b", depth - 1, alpha, beta)
-            if val > best:
-                best = val
-                best_move = mv
-            alpha = max(alpha, best)
-            if beta <= alpha:
                 break
-        return best, best_move
     else:
-        best = math.inf
-        for mv in moves:
-            nb = apply_move(board, mv)
-            val, _ = minimax(nb, "w", depth - 1, alpha, beta)
-            if val < best:
-                best = val
-                best_move = mv
-            beta = min(beta, best)
-            if beta <= alpha:
-                break
-        return best, best_move
-# ============================================================
-# Embeddings (transformer #3) for retrieving tips
-# ============================================================
-TIPS = [
-    "Всегда проверяй обязательный бой: если есть взятие, обычный ход запре��ён.",
-    "Старайся сохранять дамочную линию: не открывай край без причины.",
-    "Не меняйся, если это приводит к потере темпа и отдаёт центр.",
-    "Центр важен: контроль диагоналей увеличивает мобильность и шансы на многоходовые взятия.",
-    "Перед ходом оцени ответ соперника: что он берёт или чем отвечает на диагонали?",
-    "Если видишь возможность мультибоя, считай траекторию до конца — важно, где ты остановишься.",
-    "Дамка сильнее: иногда стоит пожертвовать шашку ради прохода в дамки.",
-    "Не оставляй одиночные шашки без поддержки — их легко поймать взятием.",
-    "Думай про 'вилку' (двойную угрозу) и про то, чтобы не подставлять шашку под обязательный бой.",
-]
-@torch.no_grad()
-def embed_text(text: str) -> torch.Tensor:
-    toks = emb_tok(text, return_tensors="pt", truncation=True, max_length=128, padding=True)
-    toks = {k: v.to(DEVICE) for k, v in toks.items()}
-    out = emb_model(**toks)
-    # mean pooling
-    last = out.last_hidden_state  # [B,T,H]
-    mask = toks["attention_mask"].unsqueeze(-1)  # [B,T,1]
-    pooled = (last * mask).sum(dim=1) / mask.sum(dim=1).clamp(min=1)
-    pooled = torch.nn.functional.normalize(pooled, p=2, dim=1)
-    return pooled[0].cpu()
-TIP_EMBS = torch.stack([embed_text(t) for t in TIPS], dim=0)  # [N,H]
-def retrieve_tips(query: str, k: int = 3) -> List[str]:
-    q = embed_text(query)
-    sims = (TIP_EMBS @ q.unsqueeze(1)).squeeze(1)  # [N]
-    top = torch.topk(sims, k=min(k, len(TIPS))).indices.tolist()
-    return [TIPS[i] for i in top]
-# ============================================================
-# Coach (transformer #1): generates explanation/feedback
-# ============================================================
-@torch.no_grad()
-def coach_generate(prompt: str, max_new_tokens: int = 160) -> str:
-    inp = coach_tok(prompt, return_tensors="pt", truncation=True, max_length=512)
-    inp = {k: v.to(DEVICE) for k, v in inp.items()}
-    out = coach_model.generate(
-        **inp,
         max_new_tokens=max_new_tokens,
-        do_sample=False,
-        num_beams=1,
     )
-    text = coach_tok.decode(out[0], skip_special_tokens=True)
-    return text.strip()
-# ============================================================
-# Opponent (transformer #2): chooses a legal move
-# ============================================================
-@torch.no_grad()
-def opponent_choose_move(board: List[List[str]], legal_moves: List[str]) -> str:
-    # distilgpt2 is not instruction-tuned, so we keep it extremely constrained and parse output.
-    board_ascii = board_to_ascii(board)
-    moves_block = "\n".join([f"- {m}" for m in legal_moves[:40]])  # cap list
-    prompt = (
-        "You are playing checkers as Black.\n"
-        "Choose ONE move exactly from the list. Output only that move.\n"
-        f"Board:\n{board_ascii}\n"
-        f"Moves:\n{moves_block}\n"
-        "Move:"
-    )
-    inp = opp_tok(prompt, return_tensors="pt", truncation=True, max_length=512)
-    inp = {k: v.to(DEVICE) for k, v in inp.items()}
-    gen = opp_model.generate(
-        **inp,
-        max_new_tokens=24,
-        do_sample=True,
-        top_p=0.85,
-        temperature=0.7,
-        pad_token_id=opp_tok.eos_token_id,
-    )
-    out = opp_tok.decode(gen[0], skip_special_tokens=True)
-    tail = out.split("Move:")[-1].strip()
-    # parse: pick the first legal move that appears in the generated tail
-    for m in legal_moves:
-        if m in tail:
-            return m
-    # fallback: try extract token pattern like a1-b2
-    cand = re.findall(r"[a-h][1-8](?:-[a-h][1-8])+", tail.lower())
-    if cand:
-        for c in cand:
-            if c in legal_moves:
-                return c
-    # final fallback: random legal
-    return random.choice(legal_moves)
-# ============================================================
-# Rendering board
-# ============================================================
-def render_board(board: List[List[str]], size: int = 520) -> Image.Image:
-    pad = 20
-    cell = (size - 2 * pad) // 8
-    img = Image.new("RGB", (size, size), (245, 245, 245))
-    d = ImageDraw.Draw(img)
-    dark = (150, 110, 80)
-    light = (235, 220, 200)
-    # grid
-    for r in range(8):
-        for c in range(8):
-            x0 = pad + c * cell
-            y0 = pad + r * cell
-            x1 = x0 + cell
-            y1 = y0 + cell
-            d.rectangle([x0, y0, x1, y1], fill=(dark if is_dark(r, c) else light))
-    # pieces
-    for r in range(8):
-        for c in range(8):
-            p = board[r][c]
-            if p == ".":
-                continue
-            cx = pad + c * cell + cell // 2
-            cy = pad + r * cell + cell // 2
-            rad = int(cell * 0.38)
-            if p in ("w", "W"):
-                fill = (245, 245, 245)
-                outline = (30, 30, 30)
-            else:
-                fill = (40, 40, 40)
-                outline = (230, 230, 230)
-            d.ellipse([cx - rad, cy - rad, cx + rad, cy + rad], fill=fill, outline=outline, width=3)
-            if is_king(p):
-                # crown marker
-                d.ellipse([cx - rad // 2, cy - rad // 2, cx + rad // 2, cy + rad // 2], outline=(255, 215, 0), width=4)
-    # coordinates
-    try:
-        font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 14)
-    except Exception:
-        font = None
-    for c in range(8):
-        d.text((pad + c * cell + 3, pad + 8 * cell + 2), chr(ord("a") + c), fill=(30, 30, 30), font=font)
-    for r in range(8):
-        d.text((3, pad + r * cell + 3), str(8 - r), fill=(30, 30, 30), font=font)
-    return img
-# ============================================================
-# Game logic wrapper
-# ============================================================
-def new_game() -> GameState:
-    return GameState(
-        board=initial_board(),
-        turn="w",
-        history=[],
-        last_analysis="",
-    )
-def legal_moves_str(board: List[List[str]], color: str) -> List[str]:
-    moves = gen_legal_moves(board, color)
-    ms = [move_seq_to_str(mv) for mv in moves]
-    # stable ordering: captures first (longer sequences first), then lexicographic
-    ms.sort(key=lambda s: (-s.count("-"), s))
-    return ms
-def analyze_user_move(board_before: List[List[str]], user_move_str: str) -> str:
-    # engine "best move" as baseline (not a transformer)
-    depth = int(os.getenv("ANALYSIS_DEPTH", "3"))
-    best_val, best_mv = minimax(board_before, "w", depth=depth, alpha=-math.inf, beta=math.inf)
-    best_str = move_seq_to_str(best_mv) if best_mv else "(none)"
-    tips = retrieve_tips("шашки: как улучшить ход и не подставиться", k=3)
-    prompt = (
-        "Ты тренер по шашкам. Коротко и по делу.\n"
-        f"Ход игрока: {user_move_str}\n"
-        f"Рекомендованный ход (по анализу): {best_str}\n"
-        "Дай объяснение: почему рекомендованный лучше, и какая ошибка/риск в ходе игрока.\n"
-        "Добавь 2-3 практических совета.\n"
-        "Подсказки:\n"
-        + "\n".join(f"- {t}" for t in tips)
-    )
-    return coach_generate(prompt, max_new_tokens=180)
-def step_user_and_bot(state: GameState, user_move: str) -> Tuple[GameState, str]:
-    if winner(state.board) is not None:
-        return state, "Game already finished."
-    if state.turn != "w":
-        return state, "Not your turn."
-    leg = legal_moves_str(state.board, "w")
-    if user_move not in leg:
-        return state, "Invalid move (not in legal list)."
-    board_before = clone_board(state.board)
-    seq = move_str_to_seq(user_move)
-    state.board = apply_move(state.board, seq)
-    state.history.append(f"White: {user_move}")
-    state.turn = "b"
-    # analysis (coach transformer)
-    state.last_analysis = analyze_user_move(board_before, user_move)
-    win = winner(state.board)
-    if win is not None:
-        state.history.append("Result: " + ("White wins" if win == "w" else "Black wins"))
-        return state, ("White wins." if win == "w" else "Black wins.")
-    # bot move
-    bot_leg = legal_moves_str(state.board, "b")
-    if not bot_leg:
-        state.history.append("Result: White wins")
-        return state, "White wins."
-    bot_move = opponent_choose_move(state.board, bot_leg)
-    bot_seq = move_str_to_seq(bot_move)
-    state.board = apply_move(state.board, bot_seq)
-    state.history.append(f"Black: {bot_move}")
-    state.turn = "w"
-    win = winner(state.board)
-    if win is not None:
-        state.history.append("Result: " + ("White wins" if win == "w" else "Black wins"))
-        return state, ("White wins." if win == "w" else "Black wins.")
-    return state, f"Bot played: {bot_move}"
-# ============================================================
-# Coach chat (transformer #1 + embeddings #3)
-# ============================================================
-def coach_chat(state: GameState, message: str, chat_hist: List[Tuple[str, str]]):
-    msg = (message or "").strip()
-    if not msg:
-        return chat_hist, ""
-    # Retrieve tips relevant to the question
-    tips = retrieve_tips(msg, k=3)
-    # Provide board context
-    context = board_to_ascii(state.board)
-    last = state.history[-6:] if state.history else []
-    prompt = (
-        "Ты тренер по шашкам. Отвечай кратко, но конкретно.\n"
-        f"Вопрос игрока: {msg}\n"
-        "Контекст партии (последние ходы):\n"
-        + ("\n".join(last) if last else "(нет)")
-        + "\n"
-        "Доска (ASCII):\n"
-        + context
-        + "\n"
-        "Полезные подсказки:\n"
-        + "\n".join(f"- {t}" for t in tips)
-        + "\n"
-        "Ответ:"
     )
-    answer = coach_generate(prompt, max_new_tokens=180)
-    chat_hist = chat_hist + [(msg, answer)]
-    return chat_hist, ""
-# ============================================================
-# UI
-# ============================================================
-theme = gr.themes.Monochrome(font=[gr.themes.GoogleFont("Inter"), "system-ui"])
-with gr.Blocks(theme=theme, title="Checkers Coach (CPU, 3 Transformers)") as demo:
-    state = gr.State(new_game())
-    with gr.Row():
-        with gr.Column(scale=1, min_width=360):
-            board_img = gr.Image(label="Board", type="pil", height=520)
-            status = gr.Textbox(label="Status", value="", interactive=False)
-            move_dd = gr.Dropdown(label="Your move (White)", choices=[], value=None)
-            play_btn = gr.Button("Play move", variant="primary")
-            new_btn = gr.Button("New game")
-            analysis = gr.Textbox(label="Coach analysis", lines=10, interactive=False)
-        with gr.Column(scale=1, min_width=360):
-            hist = gr.Markdown("")
-            gr.Markdown("### Coach chat")
-            chat = gr.Chatbot(height=360)
-            msg = gr.Textbox(label="Message", placeholder="Ask about strategy, mistakes, next plan…")
-            send = gr.Button("Send")
-    def refresh_ui(gs: GameState):
-        img = render_board(gs.board)
-        leg = legal_moves_str(gs.board, "w") if winner(gs.board) is None else []
-        h = "### History\n" + ("\n".join([f"- {x}" for x in gs.history]) if gs.history else "- (empty)")
-        return img, ("" if gs.turn == "w" else "Bot thinking / waiting…"), gr.update(choices=leg, value=(leg[0] if leg else None)), gs.last_analysis, h
-    def on_new():
-        gs = new_game()
-        return (gs, ) + refresh_ui(gs) + ([], "")
-    def on_play(gs: GameState, mv: str):
-        gs, st = step_user_and_bot(gs, mv or "")
-        img, _, dd, an, h = refresh_ui(gs)
-        return gs, img, st, dd, an, h
-    def on_send(gs: GameState, m: str, ch: List[Tuple[str, str]]):
-        ch, cleared = coach_chat(gs, m, ch or [])
-        return ch, cleared
-    demo.load(lambda gs: refresh_ui(gs), inputs=[state], outputs=[board_img, status, move_dd, analysis, hist])
-    new_btn.click(on_new, inputs=[], outputs=[state, board_img, status, move_dd, analysis, hist, chat, msg])
-    play_btn.click(on_play, inputs=[state, move_dd], outputs=[state, board_img, status, move_dd, analysis, hist])
-    send.click(on_send, inputs=[state, msg, chat], outputs=[chat, msg])
 if __name__ == "__main__":
-    demo.queue(max_size=32).launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)

 import os
+import re
 import math
+import time
+import threading
 from dataclasses import dataclass
+from typing import Dict, List, Tuple, Optional, Any
+import numpy as np
 import torch
+import gradio as gr
+from huggingface_hub import HfApi
 from transformers import (
     AutoTokenizer,
     AutoModel,
+    AutoModelForQuestionAnswering,
+    T5ForConditionalGeneration,
 )
+from transformers.utils import logging as hf_logging
+# ---------------------------
+# Runtime / logging hygiene
+# ---------------------------
 os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
+os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1")
+os.environ.setdefault("TRANSFORMERS_NO_ADVISORY_WARNINGS", "1")
+hf_logging.set_verbosity_error()
 DEVICE = torch.device("cpu")
+torch.set_grad_enabled(False)
+# Hard safety limits (RAM + CPU time)
+MAX_INPUT_CHARS = 60_000          # user text max
+MAX_CHUNKS = 100                 # max chunks to index
+CHUNK_TARGET_CHARS = 900         # chunk target size
+EMBED_BATCH = 16                 # embedding batch size
+GEN_MAX_NEW_TOKENS = 200         # generation cap
+QA_MAX_LENGTH = 384              # QA tokens
+QA_STRIDE = 128                  # QA stride for long contexts
+MAX_CONTEXT_CHARS = 3_500        # context cap before QA
+# ---------------------------
+# Model selection (availability + fallback)
+# ---------------------------
+GEN_CANDIDATES = [
+    "cointegrated/rut5-base-multitask",
+    "cointegrated/rut5-small",
+    "google/flan-t5-small",
+]
+EMB_CANDIDATES = [
+    "intfloat/multilingual-e5-small",
+    "intfloat/e5-small-v2",
+    "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
+]
+QA_CANDIDATES = [
+    "mrm8488/bert-multi-cased-finetuned-xquadv1",
+    "timopixel/bert-base-multilingual-cased-finetuned-squad",
+    "distilbert-base-cased-distilled-squad",  # english fallback
+]
+def _hf_exists(model_id: str) -> bool:
     """
+    Best-effort online check. If offline/blocked, return True (we'll try to load).
     """
+    try:
+        api = HfApi()
+        api.model_info(model_id)
+        return True
+    except Exception:
+        # If we cannot check (offline), do not fail early.
+        return True
+def pick_first_available(candidates: List[str]) -> str:
+    for mid in candidates:
+        if _hf_exists(mid):
+            return mid
+    return candidates[0]
+SELECTED_GEN = pick_first_available(GEN_CANDIDATES)
+SELECTED_EMB = pick_first_available(EMB_CANDIDATES)
+SELECTED_QA = pick_first_available(QA_CANDIDATES)
+# ---------------------------
+# Lazy model loaders
+# ---------------------------
+_load_lock = threading.Lock()
+@torch.inference_mode()
+def _to_numpy(x: torch.Tensor) -> np.ndarray:
+    return x.detach().cpu().numpy()
+def _safe_truncate_text(s: str, max_chars: int) -> str:
+    s = (s or "").strip()
+    if len(s) > max_chars:
+        return s[:max_chars].rstrip() + "\n\n[Текст обрезан по лимиту длины]"
+    return s
+def _clean_spaces(s: str) -> str:
+    return re.sub(r"\s+", " ", (s or "")).strip()
+@torch.inference_mode()
+def average_pool(last_hidden_states: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
+    # E5-style mean pooling
+    mask = attention_mask.unsqueeze(-1).bool()
+    masked = last_hidden_states.masked_fill(~mask, 0.0)
+    summed = masked.sum(dim=1)
+    denom = attention_mask.sum(dim=1).clamp(min=1).unsqueeze(-1)
+    return summed / denom
+@dataclass
+class Models:
+    gen_id: str
+    emb_id: str
+    qa_id: str
+MODELS = Models(gen_id=SELECTED_GEN, emb_id=SELECTED_EMB, qa_id=SELECTED_QA)
+# Cached objects (per process)
+_GEN_TOK: Optional[Any] = None
+_GEN_MODEL: Optional[Any] = None
+_EMB_TOK: Optional[Any] = None
+_EMB_MODEL: Optional[Any] = None
+_QA_TOK: Optional[Any] = None
+_QA_MODEL: Optional[Any] = None
+def load_generator() -> Tuple[Any, Any]:
+    global _GEN_TOK, _GEN_MODEL
+    with _load_lock:
+        if _GEN_TOK is not None and _GEN_MODEL is not None:
+            return _GEN_TOK, _GEN_MODEL
+        tok = AutoTokenizer.from_pretrained(MODELS.gen_id, use_fast=True)
+        # rut5 models are T5-compatible; flan-t5 too
+        model = T5ForConditionalGeneration.from_pretrained(
+            MODELS.gen_id,
+            torch_dtype=torch.float32,
+            low_cpu_mem_usage=True,
+        )
+        model.eval()
+        _GEN_TOK, _GEN_MODEL = tok, model
+        return tok, model
+def load_embedder() -> Tuple[Any, Any]:
+    global _EMB_TOK, _EMB_MODEL
+    with _load_lock:
+        if _EMB_TOK is not None and _EMB_MODEL is not None:
+            return _EMB_TOK, _EMB_MODEL
+        tok = AutoTokenizer.from_pretrained(MODELS.emb_id, use_fast=True)
+        model = AutoModel.from_pretrained(
+            MODELS.emb_id,
+            torch_dtype=torch.float32,
+            low_cpu_mem_usage=True,
+        )
+        model.eval()
+        _EMB_TOK, _EMB_MODEL = tok, model
+        return tok, model
+def load_qa() -> Tuple[Any, Any]:
+    global _QA_TOK, _QA_MODEL
+    with _load_lock:
+        if _QA_TOK is not None and _QA_MODEL is not None:
+            return _QA_TOK, _QA_MODEL
+        tok = AutoTokenizer.from_pretrained(MODELS.qa_id, use_fast=True)
+        model = AutoModelForQuestionAnswering.from_pretrained(
+            MODELS.qa_id,
+            torch_dtype=torch.float32,
+            low_cpu_mem_usage=True,
+        )
+        model.eval()
+        _QA_TOK, _QA_MODEL = tok, model
+        return tok, model
+# ---------------------------
+# Text chunking / indexing
+# ---------------------------
+_SENT_SPLIT = re.compile(r"(?<=[\.\!\?…])\s+|\n+")
+def split_into_chunks(text: str,
+                      target_chars: int = CHUNK_TARGET_CHARS,
+                      max_chunks: int = MAX_CHUNKS) -> List[str]:
+    text = _safe_truncate_text(text, MAX_INPUT_CHARS)
+    # Prefer paragraph-based chunks first
+    paras = [p.strip() for p in re.split(r"\n\s*\n+", text) if p.strip()]
+    chunks: List[str] = []
+    buf = ""
+    for p in paras:
+        if not buf:
+            buf = p
+            continue
+        if len(buf) + 2 + len(p) <= target_chars:
+            buf = buf + "\n\n" + p
+        else:
+            chunks.append(buf.strip())
+            buf = p
+        if len(chunks) >= max_chunks:
+            break
+    if buf and len(chunks) < max_chunks:
+        chunks.append(buf.strip())
+    # If still too big chunks (single huge para), split by sentences
+    fixed: List[str] = []
+    for c in chunks:
+        if len(c) <= target_chars * 1.6:
+            fixed.append(c)
+            continue
+        sents = [s.strip() for s in _SENT_SPLIT.split(c) if s.strip()]
+        b = ""
+        for s in sents:
+            if not b:
+                b = s
                 continue
+            if len(b) + 1 + len(s) <= target_chars:
+                b = b + " " + s
+            else:
+                fixed.append(b.strip())
+                b = s
+            if len(fixed) >= max_chunks:
                 break
+        if b and len(fixed) < max_chunks:
+            fixed.append(b.strip())
+        if len(fixed) >= max_chunks:
+            break
+    return fixed[:max_chunks]
+@torch.inference_mode()
+def embed_texts(texts: List[str], is_query: bool) -> np.ndarray:
+    tok, model = load_embedder()
+    # E5 expects prefixes
+    prefix = "query: " if is_query else "passage: "
+    texts = [prefix + _clean_spaces(t) for t in texts]
+    all_vecs: List[np.ndarray] = []
+    for i in range(0, len(texts), EMBED_BATCH):
+        batch = texts[i:i + EMBED_BATCH]
+        enc = tok(batch, padding=True, truncation=True, max_length=512, return_tensors="pt")
+        out = model(**enc)
+        pooled = average_pool(out.last_hidden_state, enc["attention_mask"])
+        pooled = torch.nn.functional.normalize(pooled, p=2, dim=1)
+        all_vecs.append(_to_numpy(pooled))
+    return np.vstack(all_vecs).astype(np.float32)
+def cosine_topk(query_vec: np.ndarray, matrix: np.ndarray, k: int) -> List[Tuple[int, float]]:
+    # query_vec: [d], matrix: [n,d], both normalized
+    scores = matrix @ query_vec.reshape(-1, 1)
+    scores = scores.squeeze(1)
+    if len(scores) == 0:
+        return []
+    k = max(1, min(k, len(scores)))
+    idx = np.argpartition(-scores, k - 1)[:k]
+    idx = idx[np.argsort(-scores[idx])]
+    return [(int(i), float(scores[i])) for i in idx]
+def build_index(text: str) -> Dict[str, Any]:
+    text = _safe_truncate_text(text, MAX_INPUT_CHARS)
+    chunks = split_into_chunks(text)
+    if not chunks:
+        return {"text": text, "chunks": [], "emb": None}
+    emb = embed_texts(chunks, is_query=False)
+    return {"text": text, "chunks": chunks, "emb": emb}
+def ensure_index(state: Optional[Dict[str, Any]], text: str) -> Dict[str, Any]:
+    text = _safe_truncate_text(text, MAX_INPUT_CHARS)
+    if not state or state.get("text") != text:
+        return build_index(text)
+    return state
+def retrieve(state: Dict[str, Any], query: str, k: int = 5) -> List[Tuple[float, str]]:
+    query = (query or "").strip()
+    if not query or not state.get("chunks") or state.get("emb") is None:
+        return []
+    qv = embed_texts([query], is_query=True)[0]
+    top = cosine_topk(qv, state["emb"], k=k)
+    out: List[Tuple[float, str]] = []
+    for idx, score in top:
+        out.append((score, state["chunks"][idx]))
+    return out
+# ---------------------------
+# Generator (ruT5 multitask)
+# ---------------------------
+@torch.inference_mode()
+def rut5(task: str, text: str, max_new_tokens: int = GEN_MAX_NEW_TOKENS,
+         do_sample: bool = False, temperature: float = 0.9, top_p: float = 0.95) -> str:
+    tok, model = load_generator()
+    task = (task or "").strip()
+    if task:
+        prompt = f"{task} | {text}"
     else:
+        prompt = text
+    enc = tok(prompt, return_tensors="pt", truncation=True, max_length=512)
+    gen_kwargs = dict(
         max_new_tokens=max_new_tokens,
+        num_beams=4 if not do_sample else 1,
+        do_sample=do_sample,
+        temperature=temperature if do_sample else None,
+        top_p=top_p if do_sample else None,
+        repetition_penalty=1.05,
+        no_repeat_ngram_size=3,
     )
+    # Remove None
+    gen_kwargs = {k: v for k, v in gen_kwargs.items() if v is not None}
+    out = model.generate(**enc, **gen_kwargs)
+    s = tok.decode(out[0], skip_special_tokens=True).strip()
+    return s
+# ---------------------------
+# Extractive QA (mBERT xquad)
+# ---------------------------
+@torch.inference_mode()
+def extractive_qa(question: str, context: str) -> Tuple[str, str]:
+    """
+    Returns: (answer, evidence_snippet)
+    """
+    question = (question or "").strip()
+    context = (context or "").strip()
+    if not question or not context:
+        return "", ""
+    tok, model = load_qa()
+    context = _safe_truncate_text(context, MAX_CONTEXT_CHARS)
+    enc = tok(
+        question,
+        context,
+        truncation="only_second",
+        max_length=QA_MAX_LENGTH,
+        stride=QA_STRIDE,
+        return_overflowing_tokens=True,
+        return_offsets_mapping=True,
+        padding=True,
+        return_tensors="pt",
+    )
+    offset_mapping = enc.pop("offset_mapping")  # [features, seq]
+    input_ids = enc["input_ids"]
+    outputs = model(**enc)
+    start_logits = outputs.start_logits
+    end_logits = outputs.end_logits
+    best_score = -1e9
+    best_span = (0, 0)
+    best_context = context
+    for i in range(input_ids.shape[0]):
+        # sequence ids: None, 0(question), 1(context)
+        seq_ids = tok.sequence_ids(i)
+        offsets = offset_mapping[i].tolist()
+        # valid context token indices
+        context_token_idxs = [j for j, sid in enumerate(seq_ids) if sid == 1 and offsets[j] != [0, 0]]
+        if not context_token_idxs:
+            continue
+        s_logits = start_logits[i].detach().cpu().numpy()
+        e_logits = end_logits[i].detach().cpu().numpy()
+        for s_idx in context_token_idxs:
+            for e_idx in context_token_idxs:
+                if e_idx < s_idx:
+                    continue
+                if e_idx - s_idx > 40:
+                    continue
+                score = float(s_logits[s_idx] + e_logits[e_idx])
+                if score > best_score:
+                    s_char, _ = offsets[s_idx]
+                    _, e_char = offsets[e_idx]
+                    if e_char > s_char:
+                        best_score = score
+                        best_span = (s_char, e_char)
+    ans = best_context[best_span[0]:best_span[1]].strip()
+    if not ans:
+        return "", ""
+    # Evidence snippet with small window
+    a, b = best_span
+    left = max(0, a - 120)
+    right = min(len(best_context), b + 120)
+    snippet = best_context[left:right].strip()
+    return ans, snippet
+# ---------------------------
+# Product features
+# ---------------------------
+def make_summary(state: Dict[str, Any], level: str) -> str:
+    chunks = state.get("chunks") or []
+    emb = state.get("emb")
+    if not chunks or emb is None:
+        return "Нет текста для обработки."
+    # central chunks via centroid similarity
+    centroid = emb.mean(axis=0)
+    centroid = centroid / (np.linalg.norm(centroid) + 1e-12)
+    sims = emb @ centroid.reshape(-1, 1)
+    sims = sims.squeeze(1)
+    k = 3 if level == "Коротко" else 6
+    k = min(k, len(chunks))
+    idx = np.argpartition(-sims, k - 1)[:k]
+    idx = idx[np.argsort(-sims[idx])]
+    selected = "\n\n".join(chunks[i] for i in idx.tolist())
+    selected = _safe_truncate_text(selected, 3000)
+    # title + simplified digest
+    title = rut5("headline", selected, max_new_tokens=32)
+    digest = rut5("simplify", selected, max_new_tokens=GEN_MAX_NEW_TOKENS)
+    # if generator fails, return extractive selection
+    if not digest:
+        digest = selected
+    return f"### Заголовок\n{title}\n\n### Пересказ\n{digest}"
+def make_quiz(state: Dict[str, Any], n: int, difficulty: str) -> str:
+    chunks = state.get("chunks") or []
+    emb = state.get("emb")
+    if not chunks or emb is None:
+        return "Нет текста для генерации вопросов."
+    n = int(max(1, min(n, 12)))
+    # pick diverse chunks: take top by centrality, then spread
+    centroid = emb.mean(axis=0)
+    centroid = centroid / (np.linalg.norm(centroid) + 1e-12)
+    sims = (emb @ centroid.reshape(-1, 1)).squeeze(1)
+    order = np.argsort(-sims).tolist()
+    # take every step to diversify
+    step = max(1, len(order) // max(1, n))
+    chosen_idx = []
+    for i in range(0, len(order), step):
+        chosen_idx.append(order[i])
+        if len(chosen_idx) >= n:
+            break
+    questions: List[Tuple[str, str, str]] = []
+    seen = set()
+    for idx in chosen_idx:
+        ctx = chunks[idx]
+        ctx_short = _safe_truncate_text(ctx, 2000)
+        # generate question
+        q = rut5("ask", ctx_short, max_new_tokens=64, do_sample=True,
+                 temperature=0.85 if difficulty == "Легко" else 1.0,
+                 top_p=0.92)
+        q = q.strip()
+        q = q if q.endswith("?") else (q + "?") if q else ""
+        if not q or q.lower() in seen:
+            continue
+        seen.add(q.lower())
+        # answer from QA model (extractive) with evidence
+        ans, ev = extractive_qa(q, ctx_short)
+        # fallback to generative "comprehend" if extractive fails
+        if not ans:
+            ans = rut5("comprehend", f"{ctx_short} Вопрос: {q}", max_new_tokens=64).strip()
+            ev = ctx_short[:260].strip()
+        questions.append((q, ans, ev))
+        if len(questions) >= n:
+            break
+    if not questions:
+        return "Не удалось сгенерировать вопросы. Попробуйте увеличить текст или выбрать другой фрагмент."
+    # format
+    out = ["### Вопросы для самопроверки\n"]
+    for i, (q, a, ev) in enumerate(questions, 1):
+        out.append(f"**{i}. {q}**")
+        out.append(f"- Ответ: {a}")
+        out.append(f"- Фрагмент: {ev}")
+        out.append("")
+    return "\n".join(out).strip()
+def answer_question(state: Dict[str, Any], question: str) -> str:
+    question = (question or "").strip()
+    if not question:
+        return "Введите вопрос."
+    hits = retrieve(state, question, k=4)
+    if not hits:
+        return "Нечего искать: сначала вставьте текст и нажмите «Проиндексировать»."
+    # Build context from top passages
+    context_parts = []
+    for score, chunk in hits:
+        context_parts.append(chunk)
+    context = "\n\n".join(context_parts)
+    context = _safe_truncate_text(context, MAX_CONTEXT_CHARS)
+    ans, ev = extractive_qa(question, context)
+    if not ans:
+        # fallback to ruT5 open-book QA
+        ans = rut5("comprehend", f"{context} Вопрос: {question}", max_new_tokens=96).strip()
+        ev = context[:320].strip()
+    return f"**Ответ:** {ans}\n\n**Доказательство (фрагмент текста):**\n{ev}"
+def search_passages(state: Dict[str, Any], query: str, k: int) -> str:
+    query = (query or "").strip()
+    if not query:
+        return "Введите запрос."
+    hits = retrieve(state, query, k=int(max(1, min(k, 10))))
+    if not hits:
+        return "Ничего не найдено."
+    out = ["### Результаты семантического поиска\n"]
+    for i, (score, chunk) in enumerate(hits, 1):
+        out.append(f"**{i}. score={score:.3f}**")
+        out.append(chunk)
+        out.append("")
+    return "\n".join(out).strip()
+# ---------------------------
+# Gradio UI
+# ---------------------------
+def model_status_text() -> str:
+    return (
+        "Выбранные модели:\n"
+        f"- Генерация: {MODELS.gen_id}\n"
+        f"- Эмбеддинги: {MODELS.emb_id}\n"
+        f"- QA (extractive): {MODELS.qa_id}\n"
+        "\nПримечание: модели скачиваются при первом обращении."
     )
+def on_index(text: str, state: Optional[Dict[str, Any]]) -> Tuple[str, Dict[str, Any]]:
+    text = _safe_truncate_text(text, MAX_INPUT_CHARS)
+    if not text.strip():
+        return "Пустой текст.", {"text": "", "chunks": [], "emb": None}
+    t0 = time.time()
+    st = build_index(text)
+    dt = time.time() - t0
+    chunks_n = len(st.get("chunks") or [])
+    return f"Готово: чанков={chunks_n}, индекс построен за {dt:.1f}с.", st
+def on_summary(text: str, state: Optional[Dict[str, Any]], level: str) -> Tuple[str, Dict[str, Any]]:
+    st = ensure_index(state, text)
+    return make_summary(st, level), st
+def on_quiz(text: str, state: Optional[Dict[str, Any]], n: int, difficulty: str) -> Tuple[str, Dict[str, Any]]:
+    st = ensure_index(state, text)
+    return make_quiz(st, n, difficulty), st
+def on_search(text: str, state: Optional[Dict[str, Any]], query: str, k: int) -> Tuple[str, Dict[str, Any]]:
+    st = ensure_index(state, text)
+    return search_passages(st, query, k), st
+def on_chat(text: str, state: Optional[Dict[str, Any]], chat: List[Tuple[str, str]], user_q: str) -> Tuple[List[Tuple[str, str]], Dict[str, Any], str]:
+    st = ensure_index(state, text)
+    user_q = (user_q or "").strip()
+    if not user_q:
+        return chat, st, ""
+    a = answer_question(st, user_q)
+    chat = (chat or []) + [(user_q, a)]
+    return chat, st, ""
+with gr.Blocks(title="Text Study Assistant (CPU, 3 Transformers)") as demo:
+    gr.Markdown("## Text Study Assistant\nМини-помощник для конспекта, самопроверки и вопросов по тексту. CPU-only, без GPU.")
+    with gr.Row():
+        with gr.Column(scale=2):
+            src_text = gr.Textbox(
+                label="Текст",
+                lines=12,
+                placeholder="Вставьте сюда текст для анализа (лекция, статья, конспект).",
+            )
+            with gr.Row():
+                btn_index = gr.Button("Проиндексировать", variant="primary")
+                index_status = gr.Textbox(label="Статус", value="Ожидаю текст…", interactive=False)
+            with gr.Accordion("Модели", open=False):
+                gr.Textbox(value=model_status_text(), lines=6, interactive=False, show_label=False)
+        with gr.Column(scale=3):
+            state = gr.State({"text": "", "chunks": [], "emb": None})
+            with gr.Tabs():
+                with gr.Tab("Пересказ"):
+                    level = gr.Radio(["Коротко", "Подробнее"], value="Коротко", label="Уровень")
+                    btn_sum = gr.Button("Сделать пересказ")
+                    sum_out = gr.Markdown()
+                with gr.Tab("Вопросы"):
+                    with gr.Row():
+                        q_n = gr.Slider(1, 12, value=6, step=1, label="Количество вопросов")
+                        q_diff = gr.Radio(["Легко", "Сложнее"], value="Легко", label="Сложность")
+                    btn_quiz = gr.Button("Сгенерировать вопросы")
+                    quiz_out = gr.Markdown()
+                with gr.Tab("Чат по тексту"):
+                    chat = gr.Chatbot(label="Диалог", height=380)
+                    with gr.Row():
+                        user_q = gr.Textbox(label="Вопрос", placeholder="Спросите что-то по тексту…", lines=1)
+                        btn_send = gr.Button("Отправить")
+                    gr.Markdown(
+                        "Ответ формируется так: семантический поиск по чанкам → extractive QA с фрагментом-доказательством → fallback на ruT5 при необходимости."
+                    )
+                with gr.Tab("Семантический поиск"):
+                    with gr.Row():
+                        search_q = gr.Textbox(label="Запрос", placeholder="Например: 'основная гипотеза' или 'методика эксперимента'")
+                        topk = gr.Slider(1, 10, value=5, step=1, label="Топ-K")
+                    btn_search = gr.Button("Найти фрагменты")
+                    search_out = gr.Markdown()
+    # Wiring
+    btn_index.click(on_index, inputs=[src_text, state], outputs=[index_status, state])
+    btn_sum.click(on_summary, inputs=[src_text, state, level], outputs=[sum_out, state])
+    btn_quiz.click(on_quiz, inputs=[src_text, state, q_n, q_diff], outputs=[quiz_out, state])
+    btn_search.click(on_search, inputs=[src_text, state, search_q, topk], outputs=[search_out, state])
+    btn_send.click(on_chat, inputs=[src_text, state, chat, user_q], outputs=[chat, state, user_q])
+    user_q.submit(on_chat, inputs=[src_text, state, chat, user_q], outputs=[chat, state, user_q])
 if __name__ == "__main__":
+    demo.queue(max_size=32).launch(server_name="0.0.0.0", server_port=7860, show_error=True)