Spaces:
Runtime error
Runtime error
| import json, faiss, re | |
| import numpy as np | |
| import gradio as gr | |
| from sentence_transformers import SentenceTransformer, CrossEncoder | |
| from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline | |
| INDEX_PATH = "e5_index.faiss" | |
| TEXTS_PATH = "texts.json" | |
| EMB_NAME = "intfloat/multilingual-e5-base" | |
| READER_NAME = "savasy/bert-base-turkish-squad" | |
| RERANK_MODEL = "BAAI/bge-reranker-v2-m3" | |
| index = faiss.read_index(INDEX_PATH) | |
| with open(TEXTS_PATH, "r", encoding="utf-8") as f: | |
| texts = json.load(f) | |
| embedder = SentenceTransformer(EMB_NAME) | |
| qa_tok = AutoTokenizer.from_pretrained(READER_NAME) | |
| qa_mod = AutoModelForQuestionAnswering.from_pretrained(READER_NAME) | |
| qa = pipeline("question-answering", model=qa_mod, tokenizer=qa_tok, device_map="auto") | |
| reranker = CrossEncoder(RERANK_MODEL) | |
| def search_semantic(q, k=80): | |
| qv = embedder.encode([f"query: {q}"], convert_to_numpy=True, normalize_embeddings=True) | |
| D, I = index.search(qv.astype(np.float32), k) | |
| return I[0].tolist() | |
| def finalize_answer(raw_answer, context, max_chars=220): | |
| ans = (raw_answer or "").strip() | |
| if not ans: | |
| return "Bilmiyorum" | |
| sents = re.split(r"(?<=[.!?])\s+", context) | |
| hit = next((s for s in sents if ans in s), None) | |
| text = (hit or ans).strip() | |
| text = re.sub(r"\s+", " ", text).strip() | |
| if len(text) > max_chars: | |
| text = text[:max_chars].rsplit(" ", 1)[0].rstrip() + "…" | |
| if not re.search(r"[.!?…]$", text): | |
| text += "." | |
| return text[0].upper() + text[1:] | |
| # ---- Cevaplayıcılar (kaynak döndürmez) ---- | |
| def answer_rerank(q, k=80, top_m=12, min_conf=0.10): | |
| I = search_semantic(q, k=k) | |
| cand = [texts[i] for i in I] | |
| scores = reranker.predict([[q, c] for c in cand]) | |
| reranked = [x for _, x in sorted(zip(scores, cand), key=lambda z: z[0], reverse=True)] | |
| best = {"answer": "Bilmiyorum", "score": 0.0, "context": ""} | |
| for ctx in reranked[:top_m]: | |
| out = qa({"question": q, "context": ctx}) | |
| ans, sc = out.get("answer", "").strip(), float(out.get("score", 0.0)) | |
| if sc > best["score"]: | |
| best = {"answer": (ans or "Bilmiyorum"), "score": sc, "context": ctx} | |
| if best["score"] < float(min_conf) or not best["answer"]: | |
| best["answer"] = "Bilmiyorum" | |
| pretty = finalize_answer(best["answer"], best["context"]) | |
| return pretty, round(best["score"], 3) | |
| def answer_simple(q, k=80, top_m=12, min_conf=0.10): | |
| I = search_semantic(q, k=k) | |
| best = {"answer": "Bilmiyorum", "score": 0.0, "context": ""} | |
| for j in I[:top_m]: | |
| ctx = texts[j] | |
| out = qa({"question": q, "context": ctx}) | |
| ans, sc = out.get("answer", "").strip(), float(out.get("score", 0.0)) | |
| if sc > best["score"]: | |
| best = {"answer": (ans or "Bilmiyorum"), "score": sc, "context": ctx} | |
| if best["score"] < float(min_conf) or not best["answer"]: | |
| best["answer"] = "Bilmiyorum" | |
| pretty = finalize_answer(best["answer"], best["context"]) | |
| return pretty, round(best["score"], 3) | |
| # ---- Gradio UI (kaynak çıktısı ve ipucu yok) ---- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Türkçe RAG QA (e5 + BERT-SQuAD)") | |
| inp = gr.Textbox(label="Sorunuzu yazın", placeholder="Verem nedir?") | |
| with gr.Row(): | |
| k = gr.Slider(20, 120, value=80, step=10, label="k (retrieval)") | |
| m = gr.Slider(4, 24, value=12, step=1, label="top_m_for_qa") | |
| th = gr.Slider(0.05, 0.35, value=0.10, step=0.01, label="min_conf") | |
| use_rerank = gr.Checkbox(value=True, label="Reranker kullan (BGE v2 m3)") | |
| out_ans = gr.Textbox(label="Cevap") | |
| out_sc = gr.Number(label="Güven", precision=3) | |
| btn = gr.Button("Sor") | |
| def route(q, kk, mm, tt, rr): | |
| if rr: | |
| return answer_rerank(q, kk, mm, tt) | |
| else: | |
| return answer_simple(q, kk, mm, tt) | |
| btn.click(route, inputs=[inp, k, m, th, use_rerank], outputs=[out_ans, out_sc]) | |
| demo.queue().launch() | |