Spaces:
Running
Running
Upload app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
"""
|
| 4 |
+
HF Space — Arabic Ticket Parser & Excel Exporter
|
| 5 |
+
------------------------------------------------
|
| 6 |
+
- لصق عدة تذاكر نصياً (من واتساب/تلغرام…)
|
| 7 |
+
- يجزّئها تلقائياً، يستخرج الحقول العربية المعروفة، ويصنّف نوع المشكلة بالكلمات المفتاحية.
|
| 8 |
+
- يعرض جدولاً قابلاً للتعديل ثم يصدّره Excel مباشرة.
|
| 9 |
+
|
| 10 |
+
المكتبات: gradio, pandas, openpyxl
|
| 11 |
+
"""
|
| 12 |
+
import re
|
| 13 |
+
import io
|
| 14 |
+
import json
|
| 15 |
+
import gradio as gr
|
| 16 |
+
import pandas as pd
|
| 17 |
+
from datetime import datetime
|
| 18 |
+
|
| 19 |
+
# ---------------------------- Utilities ----------------------------
|
| 20 |
+
ARABIC_DIGITS = str.maketrans("٠١٢٣٤٥٦٧٨٩", "0123456789")
|
| 21 |
+
|
| 22 |
+
FIELD_ALIASES = {
|
| 23 |
+
"نوع المشكلة": ["نوع المشكله", "نوع المشكله:", "نوع المشكلة:"],
|
| 24 |
+
"وقت حدوث المشكلة": ["وقت حدوث المشكله", "وقت حدوث المشكله:", "وقت حدوث المشكلة:"],
|
| 25 |
+
"اسم صاحب المشكلة": ["اسم صاحب المشكله", "اسم صاحب المشكله:", "اسم صاحب المشكلة:"],
|
| 26 |
+
"رقم الهوية": ["رقم الهويه", "رقم الهوية:"],
|
| 27 |
+
"رقم الجهاز": ["رقم الجهاز:"],
|
| 28 |
+
"رقم الجوال": ["رقم الجوال:"],
|
| 29 |
+
"المسح": ["المسح:"],
|
| 30 |
+
"المنطقة": ["المنطقه", "المنطقة:"],
|
| 31 |
+
"ملاحظات": ["ملاحظه", "ملاحظة", "ملاحظات", "ملاحظات:"]
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
# Precompile regex patterns that match: label : value
|
| 35 |
+
LABEL_SEP = r"[::]\s*"
|
| 36 |
+
|
| 37 |
+
FIELD_PATTERNS = {
|
| 38 |
+
canonical: re.compile(rf"(?:^|\n)\s*(?:{'|'.join(map(re.escape, labels))})\s*{LABEL_SEP}(.+)")
|
| 39 |
+
for canonical, labels in FIELD_ALIASES.items()
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
# Recognized separators between tickets
|
| 43 |
+
TICKET_SEP = re.compile(r"\n\s*(?:\n|—+|-{3,}|={3,}|🔴+)+\s*\n")
|
| 44 |
+
|
| 45 |
+
# Default keyword -> category rules (editable from UI)
|
| 46 |
+
DEFAULT_RULES = {
|
| 47 |
+
"معلق شاشة سوداء": ["معلق شاشة سوداء", "شاشة سوداء"],
|
| 48 |
+
"خارج نطاق المسح": ["خارج نطاق المسح"],
|
| 49 |
+
"Invalid user data": ["invalid user data", "invalid user"],
|
| 50 |
+
"رفض تسجيل الدخول": ["رفض تسجيل الدخول", "رفض الدخول"],
|
| 51 |
+
"تعليق مستمر": ["تعليق مستمر", "يعلق", "تهنيق"],
|
| 52 |
+
"مشكلة شبكة/نت": ["شبكة", "نت", "stc", "mobily"],
|
| 53 |
+
"بطء/توقف": ["بطئ", "بطء", "وقوفه", "يتوقف"],
|
| 54 |
+
"طلبات نظام": ["تحديث", "ترقية", "نسخة", "إصدار"],
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
# Columns order
|
| 58 |
+
COLUMNS = [
|
| 59 |
+
"التصنيف",
|
| 60 |
+
"نوع المشكلة",
|
| 61 |
+
"وقت حدوث المشكلة",
|
| 62 |
+
"اسم صاحب المشكلة",
|
| 63 |
+
"رقم الهوية",
|
| 64 |
+
"رقم الجهاز",
|
| 65 |
+
"رقم الجوال",
|
| 66 |
+
"المسح",
|
| 67 |
+
"المنطقة",
|
| 68 |
+
"ملاحظات",
|
| 69 |
+
"نص خام",
|
| 70 |
+
]
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def normalize_text(s: str) -> str:
|
| 74 |
+
if not isinstance(s, str):
|
| 75 |
+
return ""
|
| 76 |
+
s2 = s.translate(ARABIC_DIGITS)
|
| 77 |
+
s2 = re.sub(r"[\u200f\u200e\u2066\u2067\u2068\u2069\u00a0]", " ", s2)
|
| 78 |
+
return s2.strip()
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def split_tickets(raw: str):
|
| 82 |
+
raw = normalize_text(raw)
|
| 83 |
+
if not raw:
|
| 84 |
+
return []
|
| 85 |
+
parts = re.split(TICKET_SEP, raw)
|
| 86 |
+
if len(parts) == 1:
|
| 87 |
+
parts = [p for p in re.split(r"\n\s*\n+", raw) if p.strip()]
|
| 88 |
+
return [p.strip() for p in parts if p.strip()]
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def extract_fields(ticket_text: str) -> dict:
|
| 92 |
+
data = {k: "" for k in FIELD_PATTERNS.keys()}
|
| 93 |
+
text = normalize_text(ticket_text)
|
| 94 |
+
|
| 95 |
+
for fname, pat in FIELD_PATTERNS.items():
|
| 96 |
+
m = pat.search(text)
|
| 97 |
+
if m:
|
| 98 |
+
data[fname] = normalize_text(m.group(1).strip())
|
| 99 |
+
|
| 100 |
+
# Heuristics for phone and ID
|
| 101 |
+
if not data["رقم الجوال"]:
|
| 102 |
+
m = re.search(r"(05\d{8})", text)
|
| 103 |
+
if m:
|
| 104 |
+
data["رقم الجوال"] = m.group(1)
|
| 105 |
+
if not data["رقم الهوية"]:
|
| 106 |
+
m = re.search(r"(1\d{9})", text)
|
| 107 |
+
if m:
|
| 108 |
+
data["رقم الهوية"] = m.group(1)
|
| 109 |
+
|
| 110 |
+
data["نص خام"] = text
|
| 111 |
+
return data
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def compile_rules(user_rules_text: str):
|
| 115 |
+
"""user_rules_text is lines: keyword => label OR JSON dict."""
|
| 116 |
+
rules = DEFAULT_RULES.copy()
|
| 117 |
+
t = (user_rules_text or "").strip()
|
| 118 |
+
if not t:
|
| 119 |
+
return rules
|
| 120 |
+
# JSON first
|
| 121 |
+
try:
|
| 122 |
+
parsed = json.loads(t)
|
| 123 |
+
if isinstance(parsed, dict):
|
| 124 |
+
for label, kws in parsed.items():
|
| 125 |
+
if not isinstance(kws, list):
|
| 126 |
+
kws = [str(kws)]
|
| 127 |
+
rules[label] = [normalize_text(k) for k in kws]
|
| 128 |
+
return rules
|
| 129 |
+
except Exception:
|
| 130 |
+
pass
|
| 131 |
+
|
| 132 |
+
# Fallback line-based form
|
| 133 |
+
for line in t.splitlines():
|
| 134 |
+
if "=>" in line:
|
| 135 |
+
kw, label = map(lambda x: normalize_text(x), line.split("=>", 1))
|
| 136 |
+
if kw and label:
|
| 137 |
+
rules.setdefault(label, []).append(kw)
|
| 138 |
+
return rules
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def classify_ticket(text: str, rules: dict) -> str:
|
| 142 |
+
t = normalize_text(text)
|
| 143 |
+
for label, kws in rules.items():
|
| 144 |
+
for kw in kws:
|
| 145 |
+
if kw and kw.lower() in t.lower():
|
| 146 |
+
return label
|
| 147 |
+
return "غير مصنّف"
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
def parse_and_classify(raw_text: str, user_rules_text: str):
|
| 151 |
+
tickets = split_tickets(raw_text)
|
| 152 |
+
rules = compile_rules(user_rules_text)
|
| 153 |
+
|
| 154 |
+
rows = []
|
| 155 |
+
for tk in tickets:
|
| 156 |
+
fields = extract_fields(tk)
|
| 157 |
+
label = classify_ticket(tk + "\n" + fields.get("نوع المشكلة", ""), rules)
|
| 158 |
+
row = {
|
| 159 |
+
"التصنيف": label,
|
| 160 |
+
**fields,
|
| 161 |
+
}
|
| 162 |
+
rows.append(row)
|
| 163 |
+
|
| 164 |
+
if not rows:
|
| 165 |
+
df = pd.DataFrame(columns=COLUMNS)
|
| 166 |
+
else:
|
| 167 |
+
df = pd.DataFrame(rows)
|
| 168 |
+
for c in COLUMNS:
|
| 169 |
+
if c not in df.columns:
|
| 170 |
+
df[c] = ""
|
| 171 |
+
df = df[COLUMNS]
|
| 172 |
+
return df
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
def export_excel(df: pd.DataFrame, filename_prefix: str = "tickets"):
|
| 176 |
+
buffer = io.BytesIO()
|
| 177 |
+
with pd.ExcelWriter(buffer, engine="openpyxl") as writer:
|
| 178 |
+
df.to_excel(writer, index=False, sheet_name="التذاكر")
|
| 179 |
+
buffer.seek(0)
|
| 180 |
+
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 181 |
+
return (f"{filename_prefix}_{ts}.xlsx", buffer)
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
# ---------------------------- UI ----------------------------
|
| 185 |
+
with gr.Blocks(title="Arabic Ticket Parser — ISIC Helper", theme=gr.themes.Soft()) as demo:
|
| 186 |
+
gr.Markdown(
|
| 187 |
+
"""
|
| 188 |
+
# معالج التذاكر — لصق ثم تصدير Excel
|
| 189 |
+
الصق التذاكر (حتى 20 أو أكثر) في المربع أدناه. سيتعرف التطبيق على الحقول الشائعة ويصنّفها بالكلمات المفتاحية.
|
| 190 |
+
|
| 191 |
+
**تلميح**: افصل بين التذاكر بسطر فارغ أو فواصل مثل `---` أو `🔴🔴🔴`.
|
| 192 |
+
"""
|
| 193 |
+
)
|
| 194 |
+
with gr.Row():
|
| 195 |
+
raw = gr.Textbox(label="الصق التذاكر هنا", lines=18, placeholder="الصق نص التذاكر العربية كما هو…")
|
| 196 |
+
with gr.Accordion("قواعد التصنيف (اختياري)", open=False):
|
| 197 |
+
rules_tb = gr.Textbox(
|
| 198 |
+
label="أضف كلمات مفتاحية ومقابلها التصنيف — صيغة JSON {تصنيف: [كلمات…]} أو أسطر من الشكل (كلمة => تصنيف)",
|
| 199 |
+
lines=6,
|
| 200 |
+
placeholder='مثال JSON: {"انقطاع": ["انقطاع", "ما يفتح"], "صلاحيات": ["صلاحية", "غير مخول"]}'
|
| 201 |
+
)
|
| 202 |
+
with gr.Row():
|
| 203 |
+
parse_btn = gr.Button("تحليل التذاكر")
|
| 204 |
+
clear_btn = gr.Button("مسح")
|
| 205 |
+
df_out = gr.Dataframe(headers=COLUMNS, row_count=(1, "dynamic"), interactive=True, label="النتيجة")
|
| 206 |
+
|
| 207 |
+
with gr.Row():
|
| 208 |
+
fname = gr.Textbox(label="اسم الملف عند التصدير", value="tickets", scale=1)
|
| 209 |
+
export_btn = gr.Button("تصدير Excel", variant="primary", scale=1)
|
| 210 |
+
file_out = gr.File(label="تحميل ملف الإكسل بعد التصدير")
|
| 211 |
+
|
| 212 |
+
sample = (
|
| 213 |
+
"""
|
| 214 |
+
🔴🔴🔴
|
| 215 |
+
نوع المشكلة : كل عينة ادخل عليها عشان انتقل للقسم الثاني لازم اسجل خروج وارجع مرة أخرى تعليق مستمر رغم أن في شريحة نت خاصة وقوفه وفي بطئ ملاحظات على الجهاز
|
| 216 |
+
وقت حدوث المشكلة: 21/8/2025
|
| 217 |
+
اسم صاحب المشكلة : منيرة الشراري
|
| 218 |
+
رقم الهوية: 1037289194
|
| 219 |
+
رقم الجهاز: 868190043822887
|
| 220 |
+
رقم الجوال: 0542244234
|
| 221 |
+
اسم المسح: الطاقة المنزلية
|
| 222 |
+
المنطقة: الجوف
|
| 223 |
+
""".strip()
|
| 224 |
+
)
|
| 225 |
+
|
| 226 |
+
def on_parse(raw_text, rules_text):
|
| 227 |
+
df = parse_and_classify(raw_text or sample, rules_text)
|
| 228 |
+
return df
|
| 229 |
+
|
| 230 |
+
def on_export(df, prefix):
|
| 231 |
+
if df is None or (hasattr(df, "empty") and df.empty):
|
| 232 |
+
df = parse_and_classify(sample, "")
|
| 233 |
+
name, buff = export_excel(df, prefix or "tickets")
|
| 234 |
+
return gr.File.update(value=(name, buff), visible=True)
|
| 235 |
+
|
| 236 |
+
parse_btn.click(on_parse, inputs=[raw, rules_tb], outputs=[df_out])
|
| 237 |
+
export_btn.click(on_export, inputs=[df_out, fname], outputs=[file_out])
|
| 238 |
+
clear_btn.click(lambda: ("", ""), None, [raw, rules_tb])
|
| 239 |
+
|
| 240 |
+
gr.Examples(
|
| 241 |
+
examples=[[sample]],
|
| 242 |
+
inputs=[raw],
|
| 243 |
+
label="مثال سريع — اضغط للإدراج"
|
| 244 |
+
)
|
| 245 |
+
|
| 246 |
+
if __name__ == "__main__":
|
| 247 |
+
demo.launch()
|