Leen172 commited on
Commit
677ac21
ยท
verified ยท
1 Parent(s): ca0d594

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +168 -0
  2. requirements.txt +5 -0
  3. styles.css +8 -0
app.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import io, json, csv, uuid, random, re
3
+ import regex as re2, yake
4
+
5
+ random.seed(42)
6
+
7
+ # ====== ุงู„ู…ุนุงู„ุฌุฉ ุงู„ุฃุณุงุณูŠุฉ ======
8
+ AR_STOP = set("ููŠ ุนู„ู‰ ู…ู† ุฅู„ู‰ ุนู† ู…ุน ู„ุฏู‰ ุฐู„ูƒ ู‡ุฐู‡ ู‡ุฐุง ุงู„ุฐูŠู† ุงู„ุชูŠ ุงู„ุฐูŠ ุงู„ู„ูˆุงุชูŠ ุงู„ู„ูˆุงุชูŠุง ุฃูˆ ุฃู… ุฅู† ุฃู† ูƒุงู† ุชูƒูˆู† ูƒุงู†ูˆุง ูƒุงู†ุช ูƒู†ุช ูƒู†ุง ูƒุงู†ุง ูƒุงู†ุชู ุซู… ู‚ุฏ ู„ู‚ุฏ ุฑุจู…ุง ุจู„ ู„ูƒู† ู„ูƒู†ู‘ูŽ ุฅู„ุง ุณูˆู‰ ุญุชู‰ ุญูŠุซ ูƒู…ุง ู„ู…ุง ู„ู…ุงู‘ ู„ู…ุงู‘ูŽ ู„ู…ุงู‹ ู…ุง ู…ุงุฐุง ู„ู…ุงุฐุง ู…ุชู‰ ุฃูŠู† ูƒูŠู ุฃูŠ ุฃูŠู‘ ุฃูŠูู‘ ู‡ู†ุงูƒ ู‡ู†ุง ู‡ู†ุงูƒูŽ ุชู„ูƒ ุฐู„ูƒู… ุฐู„ูƒู† ุฃูˆู„ุฆูƒ ู‡ุคู„ุงุก ู‡ู…ุง ู‡ู† ู‡ู… ุฃู†ุชู ุฃู†ุชูŽ ุฃู†ุชู…ุง ุฃู†ุชู† ุฃู†ุชู… ุฃู†ุง ู†ุญู† ู‡ูŠ ู‡ูˆ ู‡ู†ู‘ูŽ ู‡ู…ู‘ูŽ".split())
9
+ SENT_SPLIT = re2.compile(r"(?<=[\.!ุŸ\?])\s+")
10
+
11
+ def clean_text_basic(txt:str)->str:
12
+ txt = txt.replace('\r',' ').replace('\t',' ')
13
+ txt = re.sub(r"\u200f|\u200e|\ufeff"," ",txt)
14
+ txt = re.sub(r"\s+"," ",txt)
15
+ txt = re.sub(r"\s*([\.\!\?ุŸุŒ,:;ุ›])\s*", r"\1 ", txt)
16
+ return txt.strip()
17
+
18
+ def extract_text_pdfminer(data: bytes) -> str:
19
+ try:
20
+ import pdfminer.high_level
21
+ return pdfminer.high_level.extract_text(io.BytesIO(data)) or ""
22
+ except Exception:
23
+ return ""
24
+
25
+ def extract_text_pypdf(data: bytes) -> str:
26
+ try:
27
+ from pypdf import PdfReader
28
+ out = []
29
+ for p in PdfReader(io.BytesIO(data)).pages:
30
+ out.append(p.extract_text() or "")
31
+ return "\n".join(out)
32
+ except Exception:
33
+ return ""
34
+
35
+ def extract_text_from_pdf(data: bytes) -> str:
36
+ txt = extract_text_pdfminer(data)
37
+ if not txt or len(txt.strip())<10:
38
+ txt = extract_text_pypdf(data)
39
+ return clean_text_basic(txt)
40
+
41
+ def split_sentences(text:str):
42
+ return [s for s in [s.strip() for s in SENT_SPLIT.split(text) if s.strip()] if len(s)>=25]
43
+
44
+ def top_keywords_yake(text:str, max_k=120, lan='ar'):
45
+ kws=[kw for kw,_ in yake.KeywordExtractor(lan=lan, n=1, top=max_k).extract_keywords(text)]
46
+ out,seen=[],set()
47
+ for k in kws:
48
+ k=k.strip()
49
+ if not k or k in seen: continue
50
+ if lan=="ar" and k in AR_STOP: continue
51
+ if len(k)<2: continue
52
+ seen.add(k); out.append(k)
53
+ return out
54
+
55
+ def build_distractors(correct, pool, k=3):
56
+ cand=[w for w in pool if w!=correct and len(w)>1]
57
+ random.shuffle(cand)
58
+ out=[]
59
+ for w in cand:
60
+ if len(out)==k: break
61
+ w2=w.strip()
62
+ if w2 and w2!=correct.strip(): out.append(w2)
63
+ fillers=["โ€”","-","โ€”-"]
64
+ while len(out)<k: out.append(random.choice(fillers))
65
+ return out
66
+
67
+ def make_mcqs_from_text(text: str, n=8, lang='ar'):
68
+ text = clean_text_basic(text)
69
+ sents = split_sentences(text)
70
+ if not sents: raise ValueError("ุงู„ู†ุต ู‚ุตูŠุฑ ุฌุฏู‹ุง.")
71
+ keywords = top_keywords_yake(text, 120, lang)
72
+ if not keywords:
73
+ toks = re.findall(r"[\p{L}\p{N}_]+", text)
74
+ toks = [t for t in toks if not (lang=="ar" and t in AR_STOP)]
75
+ from collections import Counter
76
+ keywords=[w for w,_ in Counter(toks).most_common(80)]
77
+ sent_for_kw={}
78
+ for s in sents:
79
+ for kw in keywords:
80
+ if kw in s and kw not in sent_for_kw:
81
+ sent_for_kw[kw]=s
82
+ items=[]; used=set()
83
+ pool=[kw for kw in keywords if kw in sent_for_kw]
84
+ for kw in pool:
85
+ if len(items)>=n: break
86
+ s=sent_for_kw[kw]
87
+ if s in used: continue
88
+ blanked=s.replace(kw,"_____",1)
89
+ choices=build_distractors(kw,[x for x in keywords if x!=kw],3)+[kw]
90
+ random.shuffle(choices)
91
+ ans=choices.index(kw)
92
+ exp=f"ู…ู‚ุชุจุณ ู…ู† ุงู„ุฌู…ู„ุฉ: {s[:220]}" + ("..." if len(s)>220 else "")
93
+ items.append({
94
+ "id": str(uuid.uuid4())[:8],
95
+ "question": blanked,
96
+ "choices": choices,
97
+ "answer_index": ans,
98
+ "explanation": exp
99
+ })
100
+ used.add(s)
101
+ if not items: raise RuntimeError("ุชุนุฐุฑ ุงู„ุชูˆู„ูŠุฏ.")
102
+ return items
103
+
104
+ def render_cards(items):
105
+ html=[]
106
+ for i,it in enumerate(items,1):
107
+ li="".join(f"<li>{c}</li>" for c in it["choices"])
108
+ ans=["A","B","C","D"][it["answer_index"]]
109
+ html.append(f"""
110
+ <article class="card">
111
+ <header><span class="badge">ุณ {i}</span><h3>{it['question']}</h3></header>
112
+ <ol type="A" class="choices">{li}</ol>
113
+ <details><summary>ุงู„ุฅุฌุงุจุฉ</summary>
114
+ <div class="answer"><b>ุงู„ุฅุฌุงุจุฉ:</b> {ans}</div></details>
115
+ </article>""")
116
+ return "\n".join(html)
117
+
118
+ def to_files(items):
119
+ json_bytes = io.BytesIO(json.dumps(items, ensure_ascii=False, indent=2).encode("utf-8")); json_bytes.name="mcqs.json"
120
+ s=io.StringIO(); w=csv.writer(s)
121
+ w.writerow(["id","question","A","B","C","D","answer_index","explanation"])
122
+ for it in items:
123
+ ch=it["choices"]
124
+ w.writerow([it["id"], it["question"], *(ch+['']*(4-len(ch))), it["answer_index"], it["explanation"]])
125
+ csv_bytes=io.BytesIO(s.getvalue().encode("utf-8")); csv_bytes.name="mcqs.csv"
126
+ return json_bytes, csv_bytes
127
+
128
+ def pipeline(text, file, n, lang):
129
+ src = (text or "").strip()
130
+ if file is not None:
131
+ b = file.read()
132
+ name = file.name.lower()
133
+ if name.endswith(".pdf"):
134
+ src = extract_text_from_pdf(b)
135
+ elif name.endswith(".txt"):
136
+ src = clean_text_basic(b.decode("utf-8","ignore"))
137
+ else:
138
+ return "โš ๏ธ ุงุฑูุนูŠ PDF ุฃูˆ TXT ูู‚ุท.", None, None
139
+ if not src: return "โš ๏ธ ุฃุฏุฎู„ูŠ ู†ุตู‹ุง ุฃูˆ ู…ู„ูู‹ุง.", None, None
140
+ items = make_mcqs_from_text(src, n, lang)
141
+ html = render_cards(items)
142
+ j,c = to_files(items)
143
+ return html,j,c
144
+
145
+ # ====== ูˆุงุฌู‡ุฉ Gradio ======
146
+ theme = gr.themes.Soft(primary_hue="indigo").set(radius_size="10px")
147
+
148
+ with open("styles.css","r",encoding="utf-8") as f:
149
+ css = f.read()
150
+
151
+ with gr.Blocks(theme=theme, css=css, fill_body=True) as demo:
152
+ gr.HTML("<style>body{direction:rtl}</style>")
153
+ gr.Markdown("## ๐Ÿง  ู…ูˆู„ู‘ุฏ ุฃุณุฆู„ุฉ ุงุฎุชูŠุงุฑ ู…ู† ู…ุชุนุฏุฏ (PDF / TXT / ู†ุต)")
154
+ with gr.Row():
155
+ with gr.Column(scale=1):
156
+ t=gr.Textbox(label="ุงู„ู†ุต",lines=8,placeholder="ุฃู„ุตู‚ูŠ ุงู„ู†ุต ู‡ู†ุง ุฃูˆ ุงุฑูุนูŠ ู…ู„ู")
157
+ f=gr.File(label="ู…ู„ู PDF ุฃูˆ TXT",file_types=[".pdf",".txt"])
158
+ n=gr.Slider(1,50,value=10,step=1,label="ุนุฏุฏ ุงู„ุฃุณุฆู„ุฉ")
159
+ lang=gr.Dropdown(["ar","en"],value="ar",label="ุงู„ู„ุบุฉ")
160
+ b=gr.Button("ุชูˆู„ูŠุฏ")
161
+ j=gr.File(label="ุชุญู…ูŠู„ JSON")
162
+ c=gr.File(label="ุชุญู…ูŠู„ CSV")
163
+ with gr.Column(scale=2):
164
+ out=gr.HTML(label="ุงู„ู†ุชุงุฆุฌ")
165
+ b.click(pipeline,[t,f,n,lang],[out,j,c])
166
+
167
+ if __name__=="__main__":
168
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ pdfminer.six
3
+ pypdf
4
+ regex
5
+ yake
styles.css ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ body { background: #fafafa; font-family: 'Cairo', sans-serif; }
2
+ .card {
3
+ background:#fff; border:1px solid #eaeaea; border-radius:14px;
4
+ padding:14px; box-shadow: 0 4px 12px rgba(0,0,0,.05); margin-bottom:10px;
5
+ }
6
+ .badge { background:#eef2ff; color:#3730a3; padding:3px 9px; border-radius:999px; font-size:12px; }
7
+ .choices { padding-inline-start:20px; }
8
+ .answer { margin-top:8px; background:#f9fafb; padding:8px; border-radius:10px; border:1px dashed #d1d5db; }