PooryaPiroozfar commited on
Commit
a1312ce
·
verified ·
1 Parent(s): a6a5bfb

Upload 5 files

Browse files
Files changed (5) hide show
  1. Dockerfile +22 -0
  2. app.py +567 -0
  3. final_frames.xlsx +0 -0
  4. frame_triples2.xlsx +0 -0
  5. requirements.txt +11 -0
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ ENV PYTHONDONTWRITEBYTECODE=1
4
+ ENV PYTHONUNBUFFERED=1
5
+
6
+ WORKDIR /app
7
+ RUN python -m nltk.downloader punkt
8
+ RUN python -c "import stanza; stanza.download('fa')"
9
+ RUN apt-get update && apt-get install -y \
10
+ git \
11
+ build-essential \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ COPY requirements.txt .
15
+ RUN pip install --upgrade pip \
16
+ && pip install --no-cache-dir -r requirements.txt
17
+
18
+ COPY . .
19
+
20
+ EXPOSE 7860
21
+
22
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,567 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Pipeline_LLM&Models.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1JImuJ0lMNHJ2zkt1iSWnhjn204ZgpPMM
8
+
9
+ # All
10
+
11
+ ## import
12
+ """
13
+ import nltk
14
+ import torch
15
+ import torch.nn as nn
16
+ import torch.nn.functional as F
17
+ from torch.utils.data import Dataset, DataLoader
18
+ import numpy as np
19
+ import os
20
+ import torch
21
+ import numpy as np
22
+ from transformers import AutoTokenizer, AutoModel, AutoModelForTokenClassification
23
+ import json
24
+ from openai import OpenAI
25
+ import pandas as pd
26
+ from huggingface_hub import snapshot_download
27
+ import stanza
28
+ import re
29
+ import json
30
+ import gradio as gr
31
+
32
+
33
+ API_KEY = os.getenv("DEEPSEEK_API_KEY")
34
+ BASE_URL = "https://api.deepseek.com"
35
+
36
+ client = OpenAI(
37
+ api_key=API_KEY,
38
+ base_url=BASE_URL
39
+ )
40
+ FINAL_FRAMES_PATH = "final_frames.xlsx"
41
+ TRIPLES_PATH = "frame_triples2.xlsx"
42
+
43
+ FRAME_DET_REPO = "PooryaPiroozfar/frame-detection-parsbert"
44
+ FE_REPO = "PooryaPiroozfar/srl-frame-elements-parsbert"
45
+ FRAME_DET_DIR = "models/frame_detection"
46
+ FE_BASE_DIR = "models/frame_elements"
47
+
48
+ # -------------------------
49
+ # دانلود مدل‌ها (یک‌بار)
50
+ # -------------------------
51
+ if not os.path.exists(FRAME_DET_DIR):
52
+ snapshot_download(repo_id=FRAME_DET_REPO, local_dir=FRAME_DET_DIR)
53
+
54
+ if not os.path.exists(FE_BASE_DIR):
55
+ snapshot_download(repo_id=FE_REPO, local_dir=FE_BASE_DIR)
56
+
57
+
58
+ frames_df = pd.read_excel(FINAL_FRAMES_PATH)
59
+ triples_df = pd.read_excel(TRIPLES_PATH)
60
+
61
+ from nltk.tokenize import sent_tokenize
62
+
63
+ def split_sentences(text):
64
+ return sent_tokenize(text)
65
+
66
+ """## Models"""
67
+
68
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
69
+
70
+ embedding_dim = 768
71
+
72
+ # save_dir = '/content/drive/MyDrive/SRLFrameDetection'
73
+
74
+
75
+ device = "cuda" if torch.cuda.is_available() else "cpu"
76
+
77
+ # لیست فریم‌ها
78
+ frame_names = [
79
+ "Activity_finish","Activity_start","Aging","Attaching","Attempt",
80
+ "Becoming","Being_born","Borrowing","Causation","Chatting",
81
+ "Choosing","Closure","Clothing","Cutting","Damaging","Desiring","Discussion",
82
+ "Emphasizing","Food","Installing","Locating","Memory","Morality_evaluation",
83
+ "Motion","Offering","Practice","Project","Publishing","Religious_belief",
84
+ "Removing","Request","Residence","Sharing","Taking","Telling","Travel",
85
+ "Using","Visiting","Waiting","Work"
86
+ ]
87
+ # -------------------------
88
+ # Encoder (ParsBERT)
89
+ # -------------------------
90
+ encoder_name = "HooshvareLab/bert-base-parsbert-uncased"
91
+ sent_tokenizer = AutoTokenizer.from_pretrained(encoder_name)
92
+ sent_encoder = AutoModel.from_pretrained(encoder_name).to(device)
93
+ sent_encoder.eval()
94
+
95
+ def get_embedding(text):
96
+ inputs = sent_tokenizer(
97
+ text,
98
+ return_tensors="pt",
99
+ truncation=True,
100
+ padding=True,
101
+ max_length=128
102
+ ).to(device)
103
+
104
+ with torch.no_grad():
105
+ outputs = sent_encoder(**inputs)
106
+
107
+ token_embeddings = outputs.last_hidden_state
108
+ mask = inputs["attention_mask"].unsqueeze(-1).expand(token_embeddings.size()).float()
109
+ summed = torch.sum(token_embeddings * mask, dim=1)
110
+ lengths = torch.clamp(mask.sum(dim=1), min=1e-9)
111
+
112
+ return (summed / lengths).squeeze(0)
113
+
114
+ # -------------------------
115
+ # مدل تشخیص فریم
116
+ # -------------------------
117
+ class FrameSimilarityModel(nn.Module):
118
+ def __init__(self, emb_dim, num_frames, frame_emb_init):
119
+ super().__init__()
120
+ self.proj = nn.Linear(emb_dim, emb_dim)
121
+ self.frame_embeddings = nn.Parameter(
122
+ torch.tensor(frame_emb_init, dtype=torch.float32)
123
+ )
124
+
125
+ def forward(self, sent_emb):
126
+ sent_proj = F.normalize(self.proj(sent_emb), dim=-1)
127
+ frames = F.normalize(self.frame_embeddings, dim=-1)
128
+ return torch.matmul(sent_proj, frames.T)
129
+
130
+ frame_embs = np.load(os.path.join(FRAME_DET_DIR, "trained_frame_embeddings.npy"))
131
+
132
+ frame_model = FrameSimilarityModel(
133
+ emb_dim=768,
134
+ num_frames=frame_embs.shape[0],
135
+ frame_emb_init=frame_embs
136
+ ).to(device)
137
+
138
+ state_dict = torch.load(
139
+ os.path.join(FRAME_DET_DIR, "best_frame_margin_model.pt"),
140
+ map_location="cpu"
141
+ )
142
+ frame_model.load_state_dict(state_dict)
143
+ frame_model.eval()
144
+ THRESHOLD = 0.1 # می‌توانید تنظیم کنید
145
+
146
+ def predict_frame(sentence):
147
+ emb = get_embedding(sentence).unsqueeze(0)
148
+ with torch.no_grad():
149
+ sims = frame_model(emb)
150
+ max_sim, idx = torch.max(sims, dim=1)
151
+
152
+ if max_sim.item() < THRESHOLD:
153
+ return None, max_sim.item()
154
+
155
+ return frame_names[idx.item()], max_sim.item()
156
+
157
+ # -------------------------
158
+ # Frame Elements
159
+ # -------------------------
160
+ def predict_frame_elements(sentence, frame_name):
161
+ frame_dir = os.path.join(FE_BASE_DIR, frame_name)
162
+ if not os.path.exists(frame_dir):
163
+ return []
164
+
165
+ with open(os.path.join(frame_dir, "label2id.json"), encoding="utf-8") as f:
166
+ label2id = json.load(f)
167
+ id2label = {int(v): k for k, v in label2id.items()}
168
+
169
+ tokenizer = AutoTokenizer.from_pretrained(frame_dir)
170
+ model = AutoModelForTokenClassification.from_pretrained(
171
+ frame_dir,
172
+ num_labels=len(label2id),
173
+ id2label=id2label,
174
+ label2id=label2id
175
+ ).to(device)
176
+ model.eval()
177
+
178
+ inputs = tokenizer(sentence, return_tensors="pt", truncation=True, max_length=128)
179
+
180
+ with torch.no_grad():
181
+ outputs = model(**inputs)
182
+
183
+ preds = torch.argmax(outputs.logits, dim=-1).squeeze(0).numpy()
184
+ tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"].squeeze(0))
185
+
186
+ elements = []
187
+ for tok, lab_id in zip(tokens, preds):
188
+ if tok in {"[CLS]", "[SEP]", "[PAD]"}:
189
+ continue
190
+ label = id2label[lab_id]
191
+ if label != "O":
192
+ elements.append((tok, label))
193
+
194
+ return elements
195
+
196
+
197
+ CONDITIONAL_MARKERS = ["اگر", "در صورتی که", "چنانچه", "به شرطی که"]
198
+
199
+ def split_conditional(sentence):
200
+ sentence = sentence.strip()
201
+
202
+ # بررسی وجود ویرگول فارسی یا انگلیسی
203
+ if "،" in sentence:
204
+ parts = sentence.split("،", 1)
205
+ elif "," in sentence:
206
+ parts = sentence.split(",", 1)
207
+ else:
208
+ return False, None, sentence
209
+
210
+ first, second = parts[0].strip(), parts[1].strip()
211
+
212
+ for marker in CONDITIONAL_MARKERS:
213
+ if first.startswith(marker):
214
+ return True, first, second
215
+
216
+ return False, None, sentence
217
+
218
+ """## POS"""
219
+
220
+ nlp_fa = stanza.Pipeline(
221
+ lang='fa',
222
+ processors='tokenize,pos,lemma,depparse',
223
+ use_gpu=False
224
+ )
225
+
226
+ def sentence_has_subject_stanza(sentence):
227
+ doc = nlp_fa(sentence)
228
+
229
+ for sent in doc.sentences:
230
+ for word in sent.words:
231
+ if word.deprel == "nsubj":
232
+ return True
233
+
234
+ return False
235
+
236
+ def extract_subject_from_condition(cond_srl, triples_df):
237
+ frame = cond_srl["frame"]
238
+ frame_elements = cond_srl["frame_elements"]
239
+
240
+ rows = triples_df[triples_df["Frame"] == frame]
241
+ subject_fes = list(rows["Subject"].unique())
242
+
243
+ for fe in subject_fes:
244
+ if fe in frame_elements and frame_elements[fe]:
245
+ return frame_elements[fe]
246
+
247
+ return None
248
+
249
+ """## build_srl_prompt"""
250
+
251
+ def build_srl_prompt(sentence):
252
+ return f"""
253
+ You are an expert in Persian Semantic Role Labeling.
254
+
255
+ Frames definition (from FrameNet-style resource):
256
+ {frames_df[['Frame','FE_list','lexical_units_fa','lexical_units_en']].to_string(index=False)}
257
+
258
+ Task:
259
+ For the Persian sentence below:
260
+ 1. Predict the most appropriate Frame.
261
+ 2. Extract Frame Elements (FE) as spans of text.
262
+ 3. Return output strictly in JSON.
263
+
264
+ Sentence:
265
+ "{sentence}"
266
+
267
+ Output format:
268
+ {{
269
+ "frame": "...",
270
+ "frame_elements": {{
271
+ "Agent": "...",
272
+ "Theme": "...",
273
+ "Time": "...",
274
+ ...
275
+ }}
276
+ }}
277
+ """
278
+
279
+ def deepseek_srl(sentence):
280
+ prompt = build_srl_prompt(sentence)
281
+
282
+ response = client.chat.completions.create(
283
+ model="deepseek-chat",
284
+ messages=[
285
+ {"role": "system", "content": "You perform Persian SRL."},
286
+ {"role": "user", "content": prompt}
287
+ ],
288
+ temperature=0
289
+ )
290
+
291
+ return response.choices[0].message.content
292
+
293
+ """## extract_triples"""
294
+
295
+ def extract_triples(frame, frame_elements):
296
+ rows = triples_df[triples_df["Frame"] == frame]
297
+ triples = []
298
+
299
+ for _, r in rows.iterrows():
300
+ subj = r["Subject"]
301
+ obj = r["Object"]
302
+
303
+ if subj in frame_elements and obj in frame_elements:
304
+ triples.append({
305
+ "subject": frame_elements[subj],
306
+ "relation": r["Relation"],
307
+ "object": frame_elements[obj],
308
+ "subject_fe": subj,
309
+ "object_fe": obj
310
+ })
311
+ return triples
312
+
313
+ """## extract_spin_rule"""
314
+
315
+ def extract_spin_rule_v2(condition_result, consequence_result):
316
+ # استخراج قانون به ۳ زبان
317
+ prompt = f"""
318
+ You are an expert in Semantic Web, SPIN rules, and formal logic.
319
+
320
+ Condition SRL result:
321
+ {condition_result}
322
+
323
+ Consequence SRL result:
324
+ {consequence_result}
325
+
326
+ Task:
327
+ 1. Generate a SPIN rule in Turtle syntax that represents:
328
+ IF condition holds THEN consequence holds.
329
+
330
+ 2. Explain this rule in clear natural Persian.
331
+
332
+ 3. Express the rule in formal Persian logical form using universal quantification.
333
+ Use structure like:
334
+ "برای هر x، اگر ... آنگاه ..."
335
+
336
+ Return output strictly in JSON format:
337
+
338
+ {{
339
+ "spin_turtle": "...",
340
+ "persian_explanation": "...",
341
+ "formal_logic_fa": "..."
342
+ }}
343
+ """
344
+
345
+ response = client.chat.completions.create(
346
+ model="deepseek-chat",
347
+ messages=[
348
+ {"role": "system", "content": "You generate SPIN rules and formal Persian logic."},
349
+ {"role": "user", "content": prompt}
350
+ ],
351
+ temperature=0
352
+ )
353
+
354
+ return response.choices[0].message.content
355
+
356
+ """## حذف None"""
357
+
358
+ def clean_frame_elements(frame_elements):
359
+ return {
360
+ fe: val
361
+ for fe, val in frame_elements.items()
362
+ if val not in (None, "", "None")
363
+ }
364
+
365
+ # def extract_triples_safe(frame, frame_elements):
366
+ # rows = triples_df[triples_df["Frame"] == frame]
367
+ # triples = []
368
+
369
+ # for _, r in rows.iterrows():
370
+ # subj = r["Subject"]
371
+ # obj = r["Object"]
372
+
373
+ # if subj in frame_elements and obj in frame_elements:
374
+ # s_val = frame_elements[subj]
375
+ # o_val = frame_elements[obj]
376
+
377
+ # if s_val and o_val:
378
+ # triples.append({
379
+ # "subject": s_val,
380
+ # "relation": r["Relation"],
381
+ # "object": o_val,
382
+ # "subject_fe": subj,
383
+ # "object_fe": obj
384
+ # })
385
+ # return triples
386
+
387
+ """## analyze_text"""
388
+
389
+
390
+ def safe_json_loads(text):
391
+ if not text:
392
+ return None
393
+
394
+ # حذف ```json ... ```
395
+ text = text.strip()
396
+ text = re.sub(r"^```json", "", text)
397
+ text = re.sub(r"^```", "", text)
398
+ text = re.sub(r"```$", "", text)
399
+
400
+ # استخراج اولین { ... }
401
+ match = re.search(r"\{.*\}", text, re.DOTALL)
402
+ if match:
403
+ json_text = match.group(0)
404
+ return json.loads(json_text)
405
+
406
+ return None
407
+
408
+ SPECIAL_DEEPSEEK_FRAMES = [
409
+ "Attempt","Becoming","Being_born","Causation","Chatting","Closure",
410
+ "Clothing","Desiring","Discussion","Emphasizing","Food","Memory",
411
+ "Morality_evaluation","Motion","Offering","Practice","Project",
412
+ "Religious_belief","Removing","Request","Sharing","Telling",
413
+ "Visiting","Work","Waiting"
414
+ ]
415
+ def analyze_text_v5(text):
416
+ results = []
417
+ sentences = split_sentences(text)
418
+
419
+ for sent in sentences:
420
+ is_cond, cond, cons = split_conditional(sent)
421
+
422
+ if is_cond:
423
+ # ---------- تشخیص فریم جمله شرط ----------
424
+ frame_cond, sim_cond = predict_frame(cond)
425
+ frame_method_cond = "trained_model"
426
+ use_deepseek_cond = False
427
+ if frame_cond is None or sim_cond < 0.4 or frame_cond in SPECIAL_DEEPSEEK_FRAMES:
428
+ use_deepseek_cond = True
429
+ frame_method_cond = "LLM"
430
+
431
+ # ---------- SRL جمله شرط ----------
432
+ if use_deepseek_cond:
433
+ cond_srl = safe_json_loads(deepseek_srl(cond))
434
+ cond_srl["frame_method"] = "LLM"
435
+ cond_srl["fe_method"] = "LLM"
436
+ else:
437
+ elements = predict_frame_elements(cond, frame_cond)
438
+ fe_method = "trained_model"
439
+ cond_srl = {
440
+ "frame": frame_cond,
441
+ "frame_elements": {label: token for token, label in elements},
442
+ "frame_method": "trained_model",
443
+ "fe_method": fe_method
444
+ }
445
+
446
+ cond_srl["frame_elements"] = clean_frame_elements(cond_srl["frame_elements"])
447
+ cond_srl["frame_similarity"] = sim_cond
448
+
449
+ # ---------- بررسی فاعل در جمله دوم ----------
450
+ has_subject = sentence_has_subject_stanza(cons)
451
+ if not has_subject:
452
+ subject = extract_subject_from_condition(cond_srl, triples_df)
453
+ if subject:
454
+ cons = subject + " " + cons
455
+
456
+ # ---------- تشخیص فریم جمله دوم ----------
457
+ frame_cons, sim_cons = predict_frame(cons)
458
+ frame_method_cons = "trained_model"
459
+ use_deepseek_cons = False
460
+ if frame_cons is None or sim_cons < 0.4 or frame_cons in SPECIAL_DEEPSEEK_FRAMES:
461
+ use_deepseek_cons = True
462
+ frame_method_cons = "LLM"
463
+
464
+ # ---------- SRL جمله دوم ----------
465
+ if use_deepseek_cons:
466
+ cons_srl = safe_json_loads(deepseek_srl(cons))
467
+ cons_srl["frame_method"] = "LLM"
468
+ cons_srl["fe_method"] = "LLM"
469
+ else:
470
+ elements = predict_frame_elements(cons, frame_cons)
471
+ fe_method = "trained_model"
472
+ cons_srl = {
473
+ "frame": frame_cons,
474
+ "frame_elements": {label: token for token, label in elements},
475
+ "frame_method": "trained_model",
476
+ "fe_method": fe_method
477
+ }
478
+
479
+ cons_srl["frame_elements"] = clean_frame_elements(cons_srl["frame_elements"])
480
+ cons_srl["frame_similarity"] = sim_cons
481
+
482
+ # ---------- استخراج triple ----------
483
+ cond_triples = extract_triples(cond_srl["frame"], cond_srl["frame_elements"])
484
+ cons_triples = extract_triples(cons_srl["frame"], cons_srl["frame_elements"])
485
+
486
+ # ---------- استخراج SPIN rule ----------
487
+ raw_spin = extract_spin_rule_v2(cond_srl, cons_srl)
488
+ spin_output = safe_json_loads(raw_spin)
489
+ if not spin_output:
490
+ spin_output = {
491
+ "spin_turtle": None,
492
+ "persian_explanation": None,
493
+ "formal_logic_fa": None
494
+ }
495
+
496
+ results.append({
497
+ "type": "conditional",
498
+ "condition": {
499
+ "sentence": cond,
500
+ "srl": cond_srl,
501
+ "triples": cond_triples
502
+ },
503
+ "consequence": {
504
+ "sentence": cons,
505
+ "srl": cons_srl,
506
+ "triples": cons_triples
507
+ },
508
+ "spin_rule": spin_output["spin_turtle"],
509
+ "spin_explanation_fa": spin_output["persian_explanation"],
510
+ "formal_logic_fa": spin_output["formal_logic_fa"]
511
+ })
512
+
513
+ else:
514
+ # ---------- جمله ساده ----------
515
+ frame_name, sim = predict_frame(sent)
516
+ frame_method = "trained_model"
517
+ use_deepseek = False
518
+ if frame_name is None or sim < 0.4 or frame_name in SPECIAL_DEEPSEEK_FRAMES:
519
+ use_deepseek = True
520
+ frame_method = "LLM"
521
+
522
+ if use_deepseek:
523
+ srl = safe_json_loads(deepseek_srl(sent))
524
+ srl["frame_method"] = "LLM"
525
+ srl["fe_method"] = "LLM"
526
+ else:
527
+ elements = predict_frame_elements(sent, frame_name)
528
+ fe_method = "trained_model"
529
+ srl = {
530
+ "frame": frame_name,
531
+ "frame_elements": {label: token for token, label in elements},
532
+ "frame_method": frame_method,
533
+ "fe_method": fe_method
534
+ }
535
+
536
+ srl["frame_elements"] = clean_frame_elements(srl["frame_elements"])
537
+ srl["frame_similarity"] = sim
538
+
539
+ triples = extract_triples(srl["frame"], srl["frame_elements"])
540
+
541
+ results.append({
542
+ "type": "simple",
543
+ "sentence": sent,
544
+ "srl": srl,
545
+ "frame_similarity": sim,
546
+ "triples": triples
547
+ })
548
+
549
+ return results
550
+ # -------------------------
551
+ # Gradio UI
552
+ # -------------------------
553
+ def ui(sentence):
554
+ return analyze_text_v5(sentence)
555
+
556
+ demo = gr.Interface(
557
+ fn=ui,
558
+ inputs=gr.Textbox(
559
+ label="جمله فارسی",
560
+ placeholder="مثال: اگر علی با قطار به مشهد سفر کند، با استاندار مشهد گپ می زند."
561
+ ),
562
+ outputs=gr.JSON(label="خروجی"),
563
+ title="Persian_Semantic_Information_Extraction",
564
+ )
565
+
566
+ if __name__ == "__main__":
567
+ demo.launch(server_name="0.0.0.0", server_port=7860)
final_frames.xlsx ADDED
Binary file (18.8 kB). View file
 
frame_triples2.xlsx ADDED
Binary file (20.8 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ sentencepiece
4
+ pandas
5
+ numpy
6
+ openpyxl
7
+ gradio
8
+ huggingface_hub
9
+ nltk
10
+ tqdm
11
+ stanza