| | import gradio as gr |
| | import subprocess |
| | import html |
| |
|
| | def clean_uri(raw: str) -> str: |
| | """تنظيف URI بإزالة زوايا < > وأي بقايا مثل ;http:""" |
| | uri = raw.strip() |
| | |
| | if uri.startswith("<") and uri.endswith(">"): |
| | uri = uri[1:-1].strip() |
| | |
| | if ";http" in uri: |
| | uri = uri.split(";http")[0] |
| | |
| | uri = uri.rstrip(" ,;") |
| | return uri |
| |
|
| | def parse_annif_output(text: str) -> str: |
| | """ |
| | يحول خرج annif (TSV: uri \t label \t score) |
| | إلى HTML مرتب بروابط قابلة للنقر. |
| | """ |
| | lines = [ln for ln in text.strip().split("\n") if ln.strip()] |
| | if not lines: |
| | return "<p>لا توجد نتائج.</p>" |
| |
|
| | items = [] |
| | for ln in lines: |
| | |
| | if ln.lstrip().startswith("#"): |
| | continue |
| | parts = ln.split("\t") |
| | if len(parts) < 3: |
| | continue |
| | raw_uri, raw_label, raw_score = parts[0], parts[1], parts[2] |
| | uri = clean_uri(raw_uri) |
| | label = html.escape(raw_label.strip().strip('"')) |
| | |
| | try: |
| | score = float(raw_score) |
| | score_str = f"{score:.3f}" |
| | except Exception: |
| | score_str = html.escape(raw_score) |
| |
|
| | |
| | |
| | items.append( |
| | f'<tr>' |
| | f'<td><a href="{uri}" target="_blank" rel="noopener noreferrer">{html.escape(uri)}</a></td>' |
| | f'<td>{label}</td>' |
| | f'<td style="text-align:right">{score_str}</td>' |
| | f'</tr>' |
| | ) |
| |
|
| | if not items: |
| | return "<p>لا توجد نتائج.</p>" |
| |
|
| | table = ( |
| | "<table style='width:100%; border-collapse:collapse'>" |
| | "<thead>" |
| | "<tr>" |
| | "<th style='text-align:right; padding:6px; border-bottom:1px solid #ddd'>URI</th>" |
| | "<th style='text-align:right; padding:6px; border-bottom:1px solid #ddd'>المصطلح</th>" |
| | "<th style='text-align:right; padding:6px; border-bottom:1px solid #ddd'>الدرجة</th>" |
| | "</tr>" |
| | "</thead>" |
| | "<tbody>" |
| | + "".join(items) + |
| | "</tbody>" |
| | "</table>" |
| | ) |
| | return table |
| |
|
| | def suggest_subjects(input_text: str, limit: int): |
| | if not input_text or not input_text.strip(): |
| | return "<p>من فضلك أدخل نصًا.</p>" |
| |
|
| | try: |
| | |
| | proc = subprocess.run( |
| | ["annif", "suggest", "ar-annif", "-", "--limit", str(int(limit))], |
| | input=input_text, |
| | capture_output=True, |
| | text=True, |
| | check=True, |
| | ) |
| | return parse_annif_output(proc.stdout) |
| | except subprocess.CalledProcessError as e: |
| | |
| | err = html.escape(e.stderr or "") |
| | out = html.escape(e.stdout or "") |
| | return f"<pre>خطأ من annif suggest:\nSTDERR:\n{err}\n\nSTDOUT:\n{out}</pre>" |
| | except Exception as ex: |
| | return f"<pre>خطأ: {html.escape(str(ex))}</pre>" |
| |
|
| | with gr.Blocks(css=""" |
| | .gradio-container { direction: rtl; font-family: 'Noto Naskh Arabic', system-ui, sans-serif; } |
| | """) as demo: |
| | gr.Markdown("## 🔎 استخراج رؤوس الموضوعات (Annif)") |
| | with gr.Row(): |
| | text = gr.Textbox(label="النص", lines=6, placeholder="ألصق هنا نصًا عربيًا...") |
| | with gr.Row(): |
| | limit = gr.Slider(1, 20, value=5, step=1, label="عدد النتائج") |
| | btn = gr.Button("اقتراح الرؤوس", variant="primary") |
| | out = gr.HTML(label="النتائج") |
| |
|
| | btn.click(fn=suggest_subjects, inputs=[text, limit], outputs=out) |
| |
|
| | if __name__ == "__main__": |
| | |
| | demo.launch(server_name="0.0.0.0", server_port=7860) |
| |
|