mo-456 commited on
Commit
ea38749
·
verified ·
1 Parent(s): 0fd5ec6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -33
app.py CHANGED
@@ -3,46 +3,50 @@ import gradio as gr
3
  from sentence_transformers import SentenceTransformer, util
4
  import torch
5
 
6
- # Load Arabic sentence transformer model
7
  model = SentenceTransformer("CAMeL-Lab/bert-base-arabic-camelbert-ca")
8
 
9
- # Load and preprocess knowledge base
10
- def load_knowledge(file_path):
11
- with open(file_path, "r", encoding="utf-8") as f:
12
- content = f.read()
13
- passages = [p.strip() for p in content.split("\n\n") if p.strip()]
14
- embeddings = model.encode(passages, convert_to_tensor=True)
15
- return passages, embeddings
16
-
17
- passages, passage_embeddings = load_knowledge("knowledge.txt")
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  # Search function
20
- def answer_question_arabic(query):
21
- query_embedding = model.encode(query, convert_to_tensor=True)
22
- scores = util.cos_sim(query_embedding, passage_embeddings)[0]
23
- top_idx = torch.argmax(scores).item()
24
- best_score = scores[top_idx].item()
25
-
26
- if best_score < 0.4:
27
- return "عذرًا، لم أتمكن من العثور على إجابة مناسبة في قاعدة المعرفة."
28
- return passages[top_idx]
29
 
30
  # Gradio UI
31
- demo = gr.Interface(
32
- fn=answer_question_arabic,
33
- inputs=gr.Textbox(label="اكتب سؤالك هنا", placeholder="ما هي أهداف التنمية المستدامة؟"),
34
- outputs=gr.Textbox(label="إجابة"),
35
- title="روبوت المعرفة - التنمية المستدامة",
36
- description="أجب عن الأسئلة باللغة العربية بناءً على قاعدة معرفية من وزارة المالية حول التنمية المستدامة والموازنة التشاركية."
37
- )
38
-
39
- # FastAPI app
40
  app = FastAPI()
41
-
42
  @app.get("/")
43
  def read_root():
44
- return {"message": "مرحبا بك! انتقل إلى /gradio لبدء التفاعل."}
45
 
46
- @app.get("/gradio")
47
- def launch_gradio():
48
- return gr.mount_gradio_app(app, demo, path="/gradio")
 
3
  from sentence_transformers import SentenceTransformer, util
4
  import torch
5
 
6
+ # Load model (auto-downloads if not cached)
7
  model = SentenceTransformer("CAMeL-Lab/bert-base-arabic-camelbert-ca")
8
 
9
+ # Load knowledge file and generate embeddings
10
+ with open("knowledge.txt", "r", encoding="utf-8") as f:
11
+ knowledge_text = f.read()
12
+
13
+ # Split the text into chunks
14
+ def split_text(text, chunk_size=400):
15
+ sentences = text.split("،") # Split on Arabic comma
16
+ chunks, chunk = [], ""
17
+ for sentence in sentences:
18
+ if len(chunk) + len(sentence) < chunk_size:
19
+ chunk += sentence + "،"
20
+ else:
21
+ chunks.append(chunk.strip())
22
+ chunk = sentence + "،"
23
+ if chunk:
24
+ chunks.append(chunk.strip())
25
+ return chunks
26
+
27
+ chunks = split_text(knowledge_text)
28
+ corpus_embeddings = model.encode(chunks, convert_to_tensor=True)
29
 
30
  # Search function
31
+ def answer_question(question):
32
+ question_embedding = model.encode(question, convert_to_tensor=True)
33
+ scores = util.cos_sim(question_embedding, corpus_embeddings)[0]
34
+ best_idx = torch.argmax(scores).item()
35
+ return chunks[best_idx]
 
 
 
 
36
 
37
  # Gradio UI
38
+ with gr.Blocks() as demo:
39
+ gr.Markdown("### 🤖 اسأل عن التنمية المستدامة أو الموازنة التشاركية")
40
+ with gr.Row():
41
+ inp = gr.Textbox(label="اكتب سؤالك هنا", placeholder="مثال: ما هي أهداف التنمية المستدامة؟")
42
+ out = gr.Textbox(label="الإجابة")
43
+ btn = gr.Button("إجابة")
44
+ btn.click(fn=answer_question, inputs=inp, outputs=out)
45
+
46
+ # FastAPI + Gradio mount
47
  app = FastAPI()
 
48
  @app.get("/")
49
  def read_root():
50
+ return {"message": "Arabic Q&A Chatbot running."}
51
 
52
+ app = gr.mount_gradio_app(app, demo, path="/")