Spaces:

SuperSl6
/

Question_Summrization_Demo

Sleeping

App Files Files Community

SuperSl6 commited on May 22, 2025

Commit

3b513ce

verified ·

1 Parent(s): d225600

Create app.py

Browse files

Files changed (1) hide show

app.py +170 -0

app.py ADDED Viewed

	@@ -0,0 +1,170 @@

+# -*- coding: utf-8 -*-
+# app.py – Arabic Questions Summarization in Mental Healthcare with ALLaM
+# Based 1-to-1 on the original Colab notebook (no changes to instructions).
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import gradio as gr
+# ------------------------------------------------------------------
+# 1. Device setup
+# ------------------------------------------------------------------
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+# ------------------------------------------------------------------
+# 2. Load ALLaM
+# ------------------------------------------------------------------
+model_id = "ALLaM-AI/ALLaM-7B-Instruct-preview"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto" if device == "cuda" else None,
+    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+    load_in_8bit=True if device == "cuda" else False,
+)
+# ------------------------------------------------------------------
+# 3. Generation helpers  (verbatim logic from the notebook)
+# ------------------------------------------------------------------
+def generate_chat_response(
+    system_text: str,
+    user_text: str,
+    max_new_tokens: int = 40,
+    temperature: float = 0.2,
+):
+    messages = []
+    if system_text.strip():
+        messages.append({"role": "system", "content": system_text})
+    messages.append({"role": "user", "content": user_text})
+    chat_input_text = tokenizer.apply_chat_template(messages, tokenize=False)
+    inputs = tokenizer(
+        chat_input_text,
+        return_tensors="pt",
+        return_token_type_ids=False,
+    )
+    inputs = {k: v.to(model.device) for k, v in inputs.items()}
+    with torch.inference_mode():
+        output_tokens = model.generate(
+            **inputs,
+            max_new_tokens=max_new_tokens,
+            do_sample=False,
+            pad_token_id=tokenizer.eos_token_id,
+        )
+    output_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
+    output_text = output_text.replace("[/INST]", "")
+    return output_text.strip()
+def generate_text(prompt: str, max_new_tokens: int = 60):
+    system_text = ""  # keep system empty exactly as in notebook
+    return generate_chat_response(
+        system_text=system_text,
+        user_text=prompt,
+        max_new_tokens=max_new_tokens,
+    )
+# ------------------------------------------------------------------
+# 4. Prompt construction (instructions unchanged)
+# ------------------------------------------------------------------
+def prompt_short_question_cot_few_shots(question_text: str) -> str:
+    examples = """
+أمثلة على كيفية التفكير خطوة بخطوة ثم إعطاء السؤال المختصر:
+السؤال الأصلي:
+انا فيني اكتئاب وقلق ووصف لي دكتور citalopram استخدمتها ثلاث شهور ولا نفع احتاج وصفه Ativan
+فكر خطوة بخطوة:
+1. المستخدم يعاني من اكتئاب وقلق.
+2. تناول دواء citalopram لثلاثة أشهر من دون تحسّن.
+3. يسأل عن الحصول على وصفة Ativan كبديل أو إضافة للعلاج.
+السؤال المختصر:
+استفسار حول عدم فعالية citalopram والحاجة الى Ativan
+السؤال الأصلي:
+كنت اعاني من قلق وأخذت دواء سبرالكس لمدة 4 شهور وتوقفت عنه في 2014. سأذهب لتحليل بول للعمل، هل سيظهر أثر الدواء؟
+فكر خطوة بخطوة:
+1. المستخدم كان يعاني من قلق وتناول سبرالكس قبل عدة سنوات.
+2. يخشى أن يظهر الدواء القديم في فحص البول المطلوب للعمل.
+3. يريد معرفة إن كان ما زال موجوداً في جسمه.
+السؤال المختصر:
+سؤال حول بقاء أثر الدواء في تحليل البول بعد مدة؟
+السؤال الأصلي:
+أصبت بالاضطراب الوجداني منذ 2008، تكررت النوبات عدة مرات. أتناول تيجرتول وأرايبرزول وأولابكس. أرغب بالعلاج النفسي دون أدوية.
+فكر خطوة بخطوة:
+1. المستخدم لديه اضطراب وجداني منذ سنوات طويلة.
+2. لديه نوبات متكررة أعوام 2008، 2009، 2013، 2017، و2018.
+3. يأخذ عدة أدوية (تيجرتول وأرايبرزول وأولابكس).
+4. يريد الآن علاجاً غير دوائي، ربما علاجاً نفسياً بديلاً.
+السؤال المختصر:
+استفسار حول علاج الاضطراب الوجداني بدون دواء
+""".strip()
+    new_question_segment = f"""
+الآن لديك سؤال جديد، فكر خطوة بخطوة بشكل مشابه ثم أعطني السؤال المختصر:
+تأكد من عدم ��ضافة ملاحظات أو اضافة معلومة غير موجودة في السؤال
+السؤال الأصلي:
+{question_text}
+السؤال المختصر:
+"""
+    prompt = f"""
+أنت مساعد لغوي مختص بأسئلة الصحة النفسية.
+هدفك هو إعادة كتابة الأسئلة المطوّلة في شكل مختصر ومباشر، يركّز على النقطة الأساسية.
+{examples}
+{new_question_segment}
+""".strip()
+    return prompt
+def summarize_question_cot_few_shots(question_text: str) -> str:
+    prompt = prompt_short_question_cot_few_shots(question_text)
+    raw_output = generate_text(prompt, max_new_tokens=60)
+    if "السؤال المختصر:" in raw_output:
+        short_summary = raw_output.split("السؤال المختصر:")[-1].strip()
+    else:
+        short_summary = raw_output.strip()
+    return short_summary
+# ------------------------------------------------------------------
+# 5. Gradio interface
+# ------------------------------------------------------------------
+def gradio_predict(question: str) -> str:
+    return summarize_question_cot_few_shots(question)
+demo = gr.Interface(
+    fn=gradio_predict,
+    inputs=gr.Textbox(
+        lines=7,
+        label="السؤال الأصلي",
+        placeholder="اكتب سؤالك المطوّل هنا...",
+    ),
+    outputs=gr.Textbox(label="السؤال المختصر"),
+    title="🧠 Arabic Mental-Health Question Summarizer (ALLaM-7B)",
+    description=(
+        "يعيد هذا التطبيق صياغة الأسئلة العربية الطويلة حول الصحة النفسية إلى سؤال مختصر ومباشر."
+    ),
+    examples=[
+        "كنت أعاني من قلق وأخذت دواء سبرالكس لمدة 4 شهور وتوقفت عنه عام 2014..."
+    ],
+)
+if __name__ == "__main__":
+    demo.launch()