Spaces:

mdvallahmedomar
/

AI

Sleeping

App Files Files Community

mdvallahmedomar commited on Dec 28, 2025

Commit

cfcbf08

1 Parent(s): 55ace82

that is enough for now

Browse files

Files changed (5) hide show

app_gradio.py +52 -39
core/__init__.py +0 -0
core/auto_qa_hf.py +40 -31
core/qa_hf.py +9 -11
core/summarizer_hf.py +9 -14

app_gradio.py CHANGED Viewed

@@ -1,4 +1,3 @@
 import gradio as gr
 from core.dataset import load_jsonl_dataset
 from core.summarizer_hf import summarize_text
@@ -6,73 +5,87 @@ from core.auto_qa_hf import generate_questions
 from core.qa_hf import answer_question_with_score
 DATASET_PATH = "datasets/my_dataset.jsonl"
-docs = load_jsonl_dataset(DATASET_PATH)
 doc_map = {f"{d['id']} - {d['title']}": d["text"] for d in docs}
 doc_choices = ["-- none --"] + list(doc_map.keys())
 def load_doc(choice):
     if choice == "-- none --":
         return ""
-    return doc_map[choice]
-def revision_mode(text, n_questions):
     text = (text or "").strip()
     if len(text) < 80:
-        return "Text too short.", "Please provide a longer text."
-    target = int(n_questions)
-    # ✅ Generate MANY questions to increase chance of good ones
-    questions = generate_questions(text, n_questions=target * 6)
-    scored_pairs = []
-    for q in questions:
         res = answer_question_with_score(text, q)
         ans, score = res["answer"], res["score"]
-        # ✅ filter out weak/empty answers
-        if score < 0.20 or len(ans) < 2:
             continue
-        scored_pairs.append((score, q, ans))
-    # ✅ Sort by confidence score and keep top N
-    scored_pairs.sort(key=lambda x: x[0], reverse=True)
-    top_pairs = scored_pairs[:target]
-    summary = summarize_text(text)
-    # ✅ If still empty, show message (NO hardcoded topic questions)
-    if not top_pairs:
-        return summary, "Could not generate strong Q&A pairs. Try a longer text or increase text clarity."
     qa_text = ""
-    for i, (score, q, ans) in enumerate(top_pairs, start=1):
         qa_text += f"Q{i}: {q}\nA{i}: {ans}\n(Confidence: {score:.2f})\n\n"
     return summary, qa_text
 with gr.Blocks() as demo:
-    gr.Markdown("# 🧠 AI Revision App (Summary + Auto Q&A)")
-    gr.Markdown("✅ Generate a short summary + revision questions with answers extracted from the text.")
     with gr.Row():
-        choice = gr.Dropdown(doc_choices, label="📚 Choose Dataset Document")
-        load_btn = gr.Button("Load Document")
-    text = gr.Textbox(label="📝 Text Input", lines=10, placeholder="Paste your text or load from dataset...")
     load_btn.click(load_doc, inputs=choice, outputs=text)
-    n_questions = gr.Slider(3, 10, value=5, step=1, label="Number of Revision Questions")
-    run_btn = gr.Button("🚀 Generate Revision (Summary + Auto Q&A)")
-    summary_out = gr.Textbox(label="✅ Ultra Short Summary", lines=4)
-    qa_out = gr.Textbox(label="✅ Auto Revision Questions & Answers", lines=14)
-    run_btn.click(revision_mode, inputs=[text, n_questions], outputs=[summary_out, qa_out])
 demo.launch(share=True)

 import gradio as gr
 from core.dataset import load_jsonl_dataset
 from core.summarizer_hf import summarize_text
 from core.qa_hf import answer_question_with_score
 DATASET_PATH = "datasets/my_dataset.jsonl"
+MIN_QUESTIONS = 3
+docs = load_jsonl_dataset(DATASET_PATH)
 doc_map = {f"{d['id']} - {d['title']}": d["text"] for d in docs}
 doc_choices = ["-- none --"] + list(doc_map.keys())
 def load_doc(choice):
     if choice == "-- none --":
         return ""
+    return doc_map.get(choice, "")
+def revision_mode(text, lang):
     text = (text or "").strip()
     if len(text) < 80:
+        msg = "Texte trop court." if lang == "fr" else "Text too short."
+        return msg, msg
+    summary = summarize_text(text, lang=lang)
+    # generate many questions -> rank by QA confidence -> pick best 3
+    candidates = generate_questions(text, lang=lang, n_questions=60)
+    scored = []
+    for q in candidates:
         res = answer_question_with_score(text, q)
         ans, score = res["answer"], res["score"]
+        if score < 0.10 or len(ans) < 2:
             continue
+        scored.append((score, q, ans))
+    scored.sort(key=lambda x: x[0], reverse=True)
+    top = scored[:MIN_QUESTIONS]
+    # second chance: generate from summary
+    if len(top) < MIN_QUESTIONS:
+        candidates2 = generate_questions(summary, lang=lang, n_questions=60)
+        for q in candidates2:
+            res = answer_question_with_score(text, q)
+            ans, score = res["answer"], res["score"]
+            if score < 0.08 or len(ans) < 2:
+                continue
+            scored.append((score, q, ans))
+        scored.sort(key=lambda x: x[0], reverse=True)
+        top = scored[:MIN_QUESTIONS]
+    # final fallback (GENERAL questions only)
+    if len(top) < MIN_QUESTIONS:
+        generic = (
+            ["De quoi parle le texte ?", "Quel est le fait le plus important ?", "Quelle est la conclusion principale ?"]
+            if lang == "fr"
+            else ["What is the text about?", "What is the most important fact?", "What is the main conclusion?"]
+        )
+        for q in generic:
+            res = answer_question_with_score(text, q)
+            ans, score = res["answer"], res["score"]
+            if score > 0.03 and len(ans) > 1:
+                top.append((score, q, ans))
     qa_text = ""
+    for i, (score, q, ans) in enumerate(top[:MIN_QUESTIONS], start=1):
         qa_text += f"Q{i}: {q}\nA{i}: {ans}\n(Confidence: {score:.2f})\n\n"
     return summary, qa_text
 with gr.Blocks() as demo:
+    gr.Markdown("# 🧠 AI Summarizer + Auto Q&A (EN/FR)")
+    gr.Markdown("✅ Generates short summary + **3 automatic revision questions** with answers.")
     with gr.Row():
+        lang = gr.Radio(["en", "fr"], value="en", label="Language / Langue")
+        choice = gr.Dropdown(doc_choices, label="📚 Dataset Document")
+        load_btn = gr.Button("Load Doc")
+    text = gr.Textbox(label="📝 Text Input", lines=10)
     load_btn.click(load_doc, inputs=choice, outputs=text)
+    run_btn = gr.Button("🚀 Generate (Summary + 3 Q&A)")
+    summary_out = gr.Textbox(label="✅ Summary / Résumé", lines=4)
+    qa_out = gr.Textbox(label="✅ Questions & Answers", lines=12)
+    run_btn.click(revision_mode, inputs=[text, lang], outputs=[summary_out, qa_out])
 demo.launch(share=True)

core/__init__.py ADDED Viewed

File without changes

core/auto_qa_hf.py CHANGED Viewed

@@ -1,41 +1,50 @@
 from transformers import pipeline
-_qg = pipeline(
-    "text2text-generation",
-    model="iarfmoose/t5-base-question-generator",
-    device=0
-)
-def generate_questions(text: str, n_questions: int = 5):
-    text = text.strip()
-    if len(text) < 40:
-        return ["What is the main idea?"]
-    prompt = f"generate questions: {text}"
-    out = _qg(prompt, max_length=256, do_sample=False)
-    gen = out[0]["generated_text"]
-    # split by ?
-    raw = [q.strip() for q in gen.split("?") if q.strip()]
-    qs = [q + "?" for q in raw]
-    # clean duplicates
-    final = []
     seen = set()
-    for q in qs:
-        q_norm = q.lower()
-        if q_norm not in seen and len(q) > 5:
             final.append(q)
-            seen.add(q_norm)
         if len(final) >= n_questions:
             break
-    # fallback if model returns nothing
     if not final:
-        final = [
-            "What is the main topic?",
-            "What are the key points?",
-            "Why is it important?"
-        ][:n_questions]
     return final

 from transformers import pipeline
+_qg = pipeline("text2text-generation", model="google/mt5-base")
+def generate_questions(text: str, lang: str = "en", n_questions: int = 50):
+    text = (text or "").strip()
+    if len(text) < 80:
+        return ["Quel est le sujet principal ?"] if lang == "fr" else ["What is the main topic?"]
+    if lang == "fr":
+        prompt = (
+            "Génère 10 questions courtes de révision basées uniquement sur ce texte. "
+            "Une question par ligne.\n\n"
+            f"TEXTE:\n{text}"
+        )
+    else:
+        prompt = (
+            "Generate 10 short revision questions based only on this text. "
+            "One question per line.\n\n"
+            f"TEXT:\n{text}"
+        )
+    out = _qg(prompt, max_length=256, do_sample=False)[0]["generated_text"]
+    lines = [l.strip() for l in out.split("\n") if l.strip()]
+    questions = []
+    for l in lines:
+        l = l.lstrip("-• ").strip()
+        if len(l) < 6:
+            continue
+        if not l.endswith("?"):
+            l += "?"
+        questions.append(l)
+    # dedupe + limit
     seen = set()
+    final = []
+    for q in questions:
+        qn = q.lower()
+        if qn not in seen:
             final.append(q)
+            seen.add(qn)
         if len(final) >= n_questions:
             break
     if not final:
+        final = ["Quel est le sujet principal ?", "Quels sont les points importants ?", "Quelle est la conclusion ?"] if lang == "fr" else \
+                ["What is the main topic?", "What are the key points?", "What is the conclusion?"]
     return final

core/qa_hf.py CHANGED Viewed

@@ -2,22 +2,20 @@ from transformers import pipeline
 _qa = pipeline(
     "question-answering",
-    model="deepset/roberta-base-squad2",
-    device=0
 )
 def answer_question_with_score(context: str, question: str):
-    context = context.strip()
-    question = question.strip()
     if not context or not question:
         return {"answer": "", "score": 0.0}
-    result = _qa(question=question, context=context)
-    answer = (result.get("answer") or "").strip()
-    score = float(result.get("score", 0.0))
-    # if model returns empty / nonsense
-    if not answer:
-        score = 0.0
-    return {"answer": answer, "score": score}

 _qa = pipeline(
     "question-answering",
+    model="deepset/xlm-roberta-large-squad2"
 )
 def answer_question_with_score(context: str, question: str):
+    context = (context or "").strip()
+    question = (question or "").strip()
     if not context or not question:
         return {"answer": "", "score": 0.0}
+    res = _qa(question=question, context=context)
+    ans = (res.get("answer") or "").strip()
+    score = float(res.get("score", 0.0))
+    if not ans:
+        return {"answer": "", "score": 0.0}
+    return {"answer": ans, "score": score}

core/summarizer_hf.py CHANGED Viewed

@@ -1,21 +1,16 @@
 from transformers import pipeline
 _summarizer = pipeline(
-    "summarization",
-    model="facebook/bart-large-cnn",
-    device=0
 )
-def summarize_text(text: str) -> str:
-    text = text.strip()
     if len(text) < 80:
-        return "النص قصير جدًا للتلخيص."
-    # Ultra short summary
-    out = _summarizer(
-        text,
-        max_length=60,   # كان 160
-        min_length=20,   # كان 40
-        do_sample=False
-    )
-    return out[0]["summary_text"]

 from transformers import pipeline
+# Multilingual summarizer
 _summarizer = pipeline(
+    "text2text-generation",
+    model="csebuetnlp/mT5_multilingual_XLSum"
 )
+def summarize_text(text: str, lang: str = "en") -> str:
+    text = (text or "").strip()
     if len(text) < 80:
+        return "Texte trop court pour résumer." if lang == "fr" else "Text too short to summarize."
+    prompt = f"summarize: {text}"
+    out = _summarizer(prompt, max_length=80, do_sample=False)
+    return out[0]["generated_text"].strip()