mcq-generator / app.py
Leen172's picture
Update app.py
9114ad9 verified
import gradio as gr
import io, json, csv, uuid, random, re
import regex as re2, yake
random.seed(42)
# ====== ุงู„ู…ุนุงู„ุฌุฉ ุงู„ุฃุณุงุณูŠุฉ ======
AR_STOP = set("ููŠ ุนู„ู‰ ู…ู† ุฅู„ู‰ ุนู† ู…ุน ู„ุฏู‰ ุฐู„ูƒ ู‡ุฐู‡ ู‡ุฐุง ุงู„ุฐูŠู† ุงู„ุชูŠ ุงู„ุฐูŠ ุงู„ู„ูˆุงุชูŠ ุงู„ู„ูˆุงุชูŠุง ุฃูˆ ุฃู… ุฅู† ุฃู† ูƒุงู† ุชูƒูˆู† ูƒุงู†ูˆุง ูƒุงู†ุช ูƒู†ุช ูƒู†ุง ูƒุงู†ุง ูƒุงู†ุชู ุซู… ู‚ุฏ ู„ู‚ุฏ ุฑุจู…ุง ุจู„ ู„ูƒู† ู„ูƒู†ู‘ูŽ ุฅู„ุง ุณูˆู‰ ุญุชู‰ ุญูŠุซ ูƒู…ุง ู„ู…ุง ู„ู…ุงู‘ ู„ู…ุงู‘ูŽ ู„ู…ุงู‹ ู…ุง ู…ุงุฐุง ู„ู…ุงุฐุง ู…ุชู‰ ุฃูŠู† ูƒูŠู ุฃูŠ ุฃูŠู‘ ุฃูŠูู‘ ู‡ู†ุงูƒ ู‡ู†ุง ู‡ู†ุงูƒูŽ ุชู„ูƒ ุฐู„ูƒู… ุฐู„ูƒู† ุฃูˆู„ุฆูƒ ู‡ุคู„ุงุก ู‡ู…ุง ู‡ู† ู‡ู… ุฃู†ุชู ุฃู†ุชูŽ ุฃู†ุชู…ุง ุฃู†ุชู† ุฃู†ุชู… ุฃู†ุง ู†ุญู† ู‡ูŠ ู‡ูˆ ู‡ู†ู‘ูŽ ู‡ู…ู‘ูŽ".split())
SENT_SPLIT = re2.compile(r"(?<=[\.!ุŸ\?])\s+")
def clean_text_basic(txt:str)->str:
txt = txt.replace('\r',' ').replace('\t',' ')
txt = re.sub(r"\u200f|\u200e|\ufeff"," ",txt)
txt = re.sub(r"\s+"," ",txt)
txt = re.sub(r"\s*([\.\!\?ุŸุŒ,:;ุ›])\s*", r"\1 ", txt)
return txt.strip()
def extract_text_pdfminer(data: bytes) -> str:
try:
import pdfminer.high_level
return pdfminer.high_level.extract_text(io.BytesIO(data)) or ""
except Exception:
return ""
def extract_text_pypdf(data: bytes) -> str:
try:
from pypdf import PdfReader
out = []
for p in PdfReader(io.BytesIO(data)).pages:
out.append(p.extract_text() or "")
return "\n".join(out)
except Exception:
return ""
def extract_text_from_pdf(data: bytes) -> str:
txt = extract_text_pdfminer(data)
if not txt or len(txt.strip())<10:
txt = extract_text_pypdf(data)
return clean_text_basic(txt)
def split_sentences(text:str):
return [s for s in [s.strip() for s in SENT_SPLIT.split(text) if s.strip()] if len(s)>=25]
def top_keywords_yake(text:str, max_k=120, lan='ar'):
kws=[kw for kw,_ in yake.KeywordExtractor(lan=lan, n=1, top=max_k).extract_keywords(text)]
out,seen=[],set()
for k in kws:
k=k.strip()
if not k or k in seen: continue
if lan=="ar" and k in AR_STOP: continue
if len(k)<2: continue
seen.add(k); out.append(k)
return out
def build_distractors(correct, pool, k=3):
cand=[w for w in pool if w!=correct and len(w)>1]
random.shuffle(cand)
out=[]
for w in cand:
if len(out)==k: break
w2=w.strip()
if w2 and w2!=correct.strip(): out.append(w2)
fillers=["โ€”","-","โ€”-"]
while len(out)<k: out.append(random.choice(fillers))
return out
def make_mcqs_from_text(text: str, n=8, lang='ar'):
text = clean_text_basic(text)
sents = split_sentences(text)
if not sents: raise ValueError("ุงู„ู†ุต ู‚ุตูŠุฑ ุฌุฏู‹ุง.")
keywords = top_keywords_yake(text, 120, lang)
if not keywords:
toks = re.findall(r"[\p{L}\p{N}_]+", text)
toks = [t for t in toks if not (lang=="ar" and t in AR_STOP)]
from collections import Counter
keywords=[w for w,_ in Counter(toks).most_common(80)]
sent_for_kw={}
for s in sents:
for kw in keywords:
if kw in s and kw not in sent_for_kw:
sent_for_kw[kw]=s
items=[]; used=set()
pool=[kw for kw in keywords if kw in sent_for_kw]
for kw in pool:
if len(items)>=n: break
s=sent_for_kw[kw]
if s in used: continue
blanked=s.replace(kw,"_____",1)
choices=build_distractors(kw,[x for x in keywords if x!=kw],3)+[kw]
random.shuffle(choices)
ans=choices.index(kw)
exp=f"ู…ู‚ุชุจุณ ู…ู† ุงู„ุฌู…ู„ุฉ: {s[:220]}" + ("..." if len(s)>220 else "")
items.append({
"id": str(uuid.uuid4())[:8],
"question": blanked,
"choices": choices,
"answer_index": ans,
"explanation": exp
})
used.add(s)
if not items: raise RuntimeError("ุชุนุฐุฑ ุงู„ุชูˆู„ูŠุฏ.")
return items
def render_cards(items):
html=[]
for i,it in enumerate(items,1):
li="".join(f"<li>{c}</li>" for c in it["choices"])
ans=["A","B","C","D"][it["answer_index"]]
html.append(f"""
<article class="card">
<header><span class="badge">ุณ {i}</span><h3>{it['question']}</h3></header>
<ol type="A" class="choices">{li}</ol>
<details><summary>ุงู„ุฅุฌุงุจุฉ</summary>
<div class="answer"><b>ุงู„ุฅุฌุงุจุฉ:</b> {ans}</div></details>
</article>""")
return "\n".join(html)
def to_files(items):
json_bytes = io.BytesIO(json.dumps(items, ensure_ascii=False, indent=2).encode("utf-8")); json_bytes.name="mcqs.json"
s=io.StringIO(); w=csv.writer(s)
w.writerow(["id","question","A","B","C","D","answer_index","explanation"])
for it in items:
ch=it["choices"]
w.writerow([it["id"], it["question"], *(ch+['']*(4-len(ch))), it["answer_index"], it["explanation"]])
csv_bytes=io.BytesIO(s.getvalue().encode("utf-8")); csv_bytes.name="mcqs.csv"
return json_bytes, csv_bytes
def pipeline(text, file, n, lang):
src = (text or "").strip()
if file is not None:
b = file.read()
name = file.name.lower()
if name.endswith(".pdf"):
src = extract_text_from_pdf(b)
elif name.endswith(".txt"):
src = clean_text_basic(b.decode("utf-8","ignore"))
else:
return "โš ๏ธ ุงุฑูุนูŠ PDF ุฃูˆ TXT ูู‚ุท.", None, None
if not src: return "โš ๏ธ ุฃุฏุฎู„ูŠ ู†ุตู‹ุง ุฃูˆ ู…ู„ูู‹ุง.", None, None
items = make_mcqs_from_text(src, n, lang)
html = render_cards(items)
j,c = to_files(items)
return html,j,c
# ====== ูˆุงุฌู‡ุฉ Gradio ======
theme = gr.themes.Soft()
try:
with open("styles.css","r",encoding="utf-8") as f:
css = f.read()
except FileNotFoundError:
css = ""
with gr.Blocks(theme=theme, css=css) as demo:
gr.HTML("<style>body{direction:rtl}</style>")
gr.Markdown("## ๐Ÿง  ู…ูˆู„ู‘ุฏ ุฃุณุฆู„ุฉ ุงุฎุชูŠุงุฑ ู…ู† ู…ุชุนุฏุฏ (PDF / TXT / ู†ุต)")
with gr.Row():
with gr.Column(scale=1):
t=gr.Textbox(label="ุงู„ู†ุต",lines=8,placeholder="ุฃู„ุตู‚ูŠ ุงู„ู†ุต ู‡ู†ุง ุฃูˆ ุงุฑูุนูŠ ู…ู„ู")
f=gr.File(label="ู…ู„ู PDF ุฃูˆ TXT",file_types=[".pdf",".txt"])
n=gr.Slider(1,50,value=10,step=1,label="ุนุฏุฏ ุงู„ุฃุณุฆู„ุฉ")
lang=gr.Dropdown(["ar","en"],value="ar",label="ุงู„ู„ุบุฉ")
b=gr.Button("ุชูˆู„ูŠุฏ")
j=gr.File(label="ุชุญู…ูŠู„ JSON")
c=gr.File(label="ุชุญู…ูŠู„ CSV")
with gr.Column(scale=2):
out=gr.HTML(label="ุงู„ู†ุชุงุฆุฌ")
b.click(pipeline,[t,f,n,lang],[out,j,c])
if __name__=="__main__":
demo.launch()