Shirjannn commited on
Commit
820d22d
·
verified ·
1 Parent(s): dbc7378

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -21
app.py CHANGED
@@ -1,35 +1,73 @@
1
- import json
2
  import gradio as gr
3
  import numpy as np
 
 
4
  from sklearn.metrics.pairwise import cosine_similarity
5
  from sentence_transformers import SentenceTransformer
 
 
6
 
7
  # مدل زبانی
8
  model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
9
 
10
- # لود دیتاست از فایل JSON
11
- with open("school_data.json", "r", encoding="utf-8") as f:
12
- school_data_dynamic = json.load(f)
13
 
14
- # ساختن بردار embedding برای هر جمله
15
- embeddings = {}
16
- for school, texts in school_data_dynamic.items():
17
- embeddings[school] = model.encode(texts)
18
 
19
- # تابع جستجو
20
- def find_school(user_input):
21
- user_vec = model.encode([user_input])
22
- best_school, best_score = None, -1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- for school, vecs in embeddings.items():
25
- sim = cosine_similarity(user_vec, vecs).max()
26
- if sim > best_score:
27
- best_school, best_score = school, sim
 
 
28
 
29
- return f"Closest school: {best_school} (score: {best_score:.2f})"
 
 
 
 
 
 
 
 
 
 
30
 
31
  # رابط Gradio
32
- iface = gr.Interface(fn=find_school,
33
- inputs=gr.Textbox(lines=2, placeholder="Enter a sentence..."),
34
- outputs="text")
35
- iface.launch()
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import numpy as np
3
+ import pandas as pd
4
+ import zipfile, os
5
  from sklearn.metrics.pairwise import cosine_similarity
6
  from sentence_transformers import SentenceTransformer
7
+ from sklearn.decomposition import PCA
8
+ import plotly.graph_objects as go
9
 
10
  # مدل زبانی
11
  model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
12
 
13
+ # مسیر فایل ZIP
14
+ zip_path = "school_data.zip"
15
+ extract_folder = "school_data"
16
 
17
+ # استخراج ZIP اگر هنوز استخراج نشده
18
+ if not os.path.exists(extract_folder):
19
+ with zipfile.ZipFile(zip_path, "r") as zip_ref:
20
+ zip_ref.extractall(extract_folder)
21
 
22
+ # لود دیتای CSV ها
23
+ school_data_dynamic = {}
24
+ for file in os.listdir(extract_folder):
25
+ if file.endswith(".csv"):
26
+ df = pd.read_csv(os.path.join(extract_folder, file))
27
+ # فرض: ستون اول = key ، ستون دوم = متن
28
+ if df.shape[1] >= 2:
29
+ for _, row in df.iterrows():
30
+ key = str(row[0])
31
+ value = str(row[1])
32
+ school_data_dynamic[key] = value
33
+
34
+ # تبدیل دیتای متنی به embedding
35
+ texts = list(school_data_dynamic.values())
36
+ keys = list(school_data_dynamic.keys())
37
+ embeddings = model.encode(texts, convert_to_tensor=False)
38
+
39
+ # کاهش ابعاد برای رسم
40
+ pca = PCA(n_components=2)
41
+ reduced_embeddings = pca.fit_transform(embeddings)
42
 
43
+ # تابع جستجو
44
+ def search(query):
45
+ query_emb = model.encode([query], convert_to_tensor=False)
46
+ sims = cosine_similarity([query_emb[0]], embeddings)[0]
47
+ top_idx = np.argmax(sims)
48
+ return f"Closest match: {keys[top_idx]} → {texts[top_idx]}"
49
 
50
+ # تابع برای رسم گراف
51
+ def plot_embeddings():
52
+ fig = go.Figure()
53
+ fig.add_trace(go.Scatter(
54
+ x=reduced_embeddings[:,0],
55
+ y=reduced_embeddings[:,1],
56
+ mode="markers+text",
57
+ text=keys,
58
+ textposition="top center"
59
+ ))
60
+ return fig
61
 
62
  # رابط Gradio
63
+ with gr.Blocks() as demo:
64
+ gr.Markdown("# 🔍 Semantic Search in School Data")
65
+ inp = gr.Textbox(label="Enter your query")
66
+ out = gr.Textbox(label="Best Match")
67
+ btn = gr.Button("Search")
68
+ graph = gr.Plot(label="Embedding Visualization")
69
+
70
+ btn.click(fn=search, inputs=inp, outputs=out)
71
+ demo.load(fn=plot_embeddings, inputs=None, outputs=graph)
72
+
73
+ demo.launch()