Update app.py
Browse files
app.py
CHANGED
|
@@ -1,35 +1,73 @@
|
|
| 1 |
-
import json
|
| 2 |
import gradio as gr
|
| 3 |
import numpy as np
|
|
|
|
|
|
|
| 4 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 5 |
from sentence_transformers import SentenceTransformer
|
|
|
|
|
|
|
| 6 |
|
| 7 |
# مدل زبانی
|
| 8 |
model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
|
| 9 |
|
| 10 |
-
#
|
| 11 |
-
|
| 12 |
-
|
| 13 |
|
| 14 |
-
#
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
|
| 19 |
-
# تا
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
| 28 |
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
# رابط Gradio
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import numpy as np
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import zipfile, os
|
| 5 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 6 |
from sentence_transformers import SentenceTransformer
|
| 7 |
+
from sklearn.decomposition import PCA
|
| 8 |
+
import plotly.graph_objects as go
|
| 9 |
|
| 10 |
# مدل زبانی
|
| 11 |
model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
|
| 12 |
|
| 13 |
+
# مسیر فایل ZIP
|
| 14 |
+
zip_path = "school_data.zip"
|
| 15 |
+
extract_folder = "school_data"
|
| 16 |
|
| 17 |
+
# استخراج ZIP اگر هنوز استخراج نشده
|
| 18 |
+
if not os.path.exists(extract_folder):
|
| 19 |
+
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
| 20 |
+
zip_ref.extractall(extract_folder)
|
| 21 |
|
| 22 |
+
# لود دیتای CSV ها
|
| 23 |
+
school_data_dynamic = {}
|
| 24 |
+
for file in os.listdir(extract_folder):
|
| 25 |
+
if file.endswith(".csv"):
|
| 26 |
+
df = pd.read_csv(os.path.join(extract_folder, file))
|
| 27 |
+
# فرض: ستون اول = key ، ستون دوم = متن
|
| 28 |
+
if df.shape[1] >= 2:
|
| 29 |
+
for _, row in df.iterrows():
|
| 30 |
+
key = str(row[0])
|
| 31 |
+
value = str(row[1])
|
| 32 |
+
school_data_dynamic[key] = value
|
| 33 |
+
|
| 34 |
+
# تبدیل دیتای متنی به embedding
|
| 35 |
+
texts = list(school_data_dynamic.values())
|
| 36 |
+
keys = list(school_data_dynamic.keys())
|
| 37 |
+
embeddings = model.encode(texts, convert_to_tensor=False)
|
| 38 |
+
|
| 39 |
+
# کاهش ابعاد برای رسم
|
| 40 |
+
pca = PCA(n_components=2)
|
| 41 |
+
reduced_embeddings = pca.fit_transform(embeddings)
|
| 42 |
|
| 43 |
+
# تابع جستجو
|
| 44 |
+
def search(query):
|
| 45 |
+
query_emb = model.encode([query], convert_to_tensor=False)
|
| 46 |
+
sims = cosine_similarity([query_emb[0]], embeddings)[0]
|
| 47 |
+
top_idx = np.argmax(sims)
|
| 48 |
+
return f"Closest match: {keys[top_idx]} → {texts[top_idx]}"
|
| 49 |
|
| 50 |
+
# تابع برای رسم گراف
|
| 51 |
+
def plot_embeddings():
|
| 52 |
+
fig = go.Figure()
|
| 53 |
+
fig.add_trace(go.Scatter(
|
| 54 |
+
x=reduced_embeddings[:,0],
|
| 55 |
+
y=reduced_embeddings[:,1],
|
| 56 |
+
mode="markers+text",
|
| 57 |
+
text=keys,
|
| 58 |
+
textposition="top center"
|
| 59 |
+
))
|
| 60 |
+
return fig
|
| 61 |
|
| 62 |
# رابط Gradio
|
| 63 |
+
with gr.Blocks() as demo:
|
| 64 |
+
gr.Markdown("# 🔍 Semantic Search in School Data")
|
| 65 |
+
inp = gr.Textbox(label="Enter your query")
|
| 66 |
+
out = gr.Textbox(label="Best Match")
|
| 67 |
+
btn = gr.Button("Search")
|
| 68 |
+
graph = gr.Plot(label="Embedding Visualization")
|
| 69 |
+
|
| 70 |
+
btn.click(fn=search, inputs=inp, outputs=out)
|
| 71 |
+
demo.load(fn=plot_embeddings, inputs=None, outputs=graph)
|
| 72 |
+
|
| 73 |
+
demo.launch()
|