Arabic-Summarization

Sleeping

oddadmix commited on Aug 19, 2025

Commit

dd624c6

verified ·

1 Parent(s): 5ce4580

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,27 +1,43 @@
 import spaces
-from transformers import AutoModelForSeq2SeqLM,AutoTokenizer, pipeline
 import gradio as grad
-mdl_name = "oddadmix/Masrawy-BiLingual-v1"
-pipe = pipeline("translation", model=mdl_name, device = 'cuda')
 @spaces.GPU
-def translate(text, direction):
-    # inputs = my_tkn(text, return_tensors="pt")
-    # trans_output = mdl.generate(**inputs)
-    if direction == "MSA → Egyptian":
-        prompt = text + " <arz>"
-    else:
-        prompt = text + " <ar>"
-    response = pipe(prompt)[0]['translation_text']
-    #response = opus_translator(text)
-    return response
 input_textbox = grad.Textbox(lines=5, placeholder="اكتب النص هنا بالعربية الفصحى أو باللهجة المصرية...", label="Input Text")
 output_textbox = grad.Textbox(lines=5, label="النص المترجم")
-grad.Interface(translate, inputs=[input_textbox,         grad.Radio(
-            choices=["MSA → Egyptian", "Egyptian → MSA"],
-            value="MSA → Egyptian",
-            label="اتجاه الترجمة"
-        )], outputs=output_textbox, title="Masrawy: MSA ↔ Egyptian Translator",
     description="اختر اتجاه الترجمة وأدخل النص بالعربية الفصحى أو باللهجة المصرية.").launch()

 import spaces
+from transformers import AutoModelForCausalLM, AutoTokenizer
 import gradio as grad
+model_id = "oddadmix/arabic-summarization"
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto",
+    torch_dtype="bfloat16",
+    attn_implementation="flash_attention_2"
+)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
 @spaces.GPU
+def summarize(text):
+    input_ids = tokenizer.apply_chat_template(
+        [{"role": "user", "content": prompt}],
+        add_generation_prompt=True,
+        return_tensors="pt",
+        tokenize=True,
+    ).to(model.device)
+    output = model.generate(
+        input_ids,
+        do_sample=True,
+        temperature=0.3,
+        min_p=0.15,
+        repetition_penalty=1.05,
+        max_new_tokens=512,
+    )
+    response = tokenizer.decode(output[0], skip_special_tokens=False)
+    return response.split("<|im_start|>assistant")[1]
 input_textbox = grad.Textbox(lines=5, placeholder="اكتب النص هنا بالعربية الفصحى أو باللهجة المصرية...", label="Input Text")
 output_textbox = grad.Textbox(lines=5, label="النص المترجم")
+grad.Interface(summarize, inputs=[input_textbox], outputs=output_textbox, title="Masrawy: MSA ↔ Egyptian Translator",
     description="اختر اتجاه الترجمة وأدخل النص بالعربية الفصحى أو باللهجة المصرية.").launch()