mo-456 commited on
Commit
aa409b7
·
verified ·
1 Parent(s): 397fe86

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -24
app.py CHANGED
@@ -1,18 +1,38 @@
1
- from fastapi import FastAPI
2
  import gradio as gr
3
  from sentence_transformers import SentenceTransformer, util
4
  import torch
 
5
 
6
- # Load model (auto-downloads if not cached)
7
- model = SentenceTransformer("CAMeL-Lab/bert-base-arabic-camelbert-ca")
 
8
 
9
- # Load knowledge file and generate embeddings
10
- with open("knowledge.txt", "r", encoding="utf-8") as f:
11
- knowledge_text = f.read()
 
 
 
 
12
 
13
- # Split the text into chunks
14
- def split_text(text, chunk_size=400):
15
- sentences = text.split("،") # Split on Arabic comma
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  chunks, chunk = [], ""
17
  for sentence in sentences:
18
  if len(chunk) + len(sentence) < chunk_size:
@@ -24,29 +44,39 @@ def split_text(text, chunk_size=400):
24
  chunks.append(chunk.strip())
25
  return chunks
26
 
27
- chunks = split_text(knowledge_text)
28
- corpus_embeddings = model.encode(chunks, convert_to_tensor=True)
 
 
 
 
 
 
29
 
30
  # Search function
31
  def answer_question(question):
32
- question_embedding = model.encode(question, convert_to_tensor=True)
33
- scores = util.cos_sim(question_embedding, corpus_embeddings)[0]
34
- best_idx = torch.argmax(scores).item()
35
- return chunks[best_idx]
 
 
 
 
 
 
 
 
36
 
37
  # Gradio UI
 
38
  with gr.Blocks() as demo:
39
  gr.Markdown("### 🤖 اسأل عن التنمية المستدامة أو الموازنة التشاركية")
40
  with gr.Row():
41
- inp = gr.Textbox(label="اكتب سؤالك هنا", placeholder="مثال: ما هي أهداف التنمية المستدامة؟")
42
- out = gr.Textbox(label="الإجابة")
43
  btn = gr.Button("إجابة")
44
  btn.click(fn=answer_question, inputs=inp, outputs=out)
45
 
46
- # FastAPI + Gradio mount
47
- app = FastAPI()
48
- @app.get("/")
49
- def read_root():
50
- return {"message": "Arabic Q&A Chatbot running."}
51
-
52
- app = gr.mount_gradio_app(app, demo, path="/")
 
 
1
  import gradio as gr
2
  from sentence_transformers import SentenceTransformer, util
3
  import torch
4
+ import logging
5
 
6
+ # Set up logging
7
+ logging.basicConfig(level=logging.INFO)
8
+ logger = logging.getLogger(__name__)
9
 
10
+ # Load model
11
+ logger.info("Loading model...")
12
+ try:
13
+ model = SentenceTransformer("CAMeL-Lab/bert-base-arabic-camelbert-ca")
14
+ except Exception as e:
15
+ logger.error(f"Failed to load model: {e}")
16
+ raise
17
 
18
+ # Load knowledge file
19
+ logger.info("Loading knowledge file...")
20
+ try:
21
+ with open("knowledge.txt", "r", encoding="utf-8") as f:
22
+ knowledge_text = f.read()
23
+ if not knowledge_text.strip():
24
+ raise ValueError("knowledge.txt is empty")
25
+ except FileNotFoundError:
26
+ logger.error("knowledge.txt not found")
27
+ raise
28
+ except Exception as e:
29
+ logger.error(f"Error reading knowledge.txt: {e}")
30
+ raise
31
+
32
+ # Split text into chunks
33
+ def split_text(text, chunk_size=200):
34
+ logger.info("Splitting text into chunks...")
35
+ sentences = text.split("،")
36
  chunks, chunk = [], ""
37
  for sentence in sentences:
38
  if len(chunk) + len(sentence) < chunk_size:
 
44
  chunks.append(chunk.strip())
45
  return chunks
46
 
47
+ # Generate embeddings
48
+ logger.info("Generating embeddings...")
49
+ try:
50
+ chunks = split_text(knowledge_text)
51
+ corpus_embeddings = model.encode(chunks, convert_to_tensor=True, batch_size=16)
52
+ except Exception as e:
53
+ logger.error(f"Error generating embeddings: {e}")
54
+ raise
55
 
56
  # Search function
57
  def answer_question(question):
58
+ try:
59
+ if not question.strip():
60
+ return "الرجاء إدخال سؤال."
61
+ question_embedding = model.encode(question, convert_to_tensor=True)
62
+ scores = util.cos_sim(question_embedding, corpus_embeddings)[0]
63
+ best_idx = torch.argmax(scores).item()
64
+ score = scores[best_idx].item()
65
+ logger.info(f"Question: {question}, Best chunk: {chunks[best_idx][:50]}..., Score: {score:.4f}")
66
+ return chunks[best_idx] if score > 0.3 else "عذرًا، لم أجد إجابة مناسبة. حاول صياغة السؤال بطريقة أخرى."
67
+ except Exception as e:
68
+ logger.error(f"Error answering question: {e}")
69
+ return "حدث خطأ أثناء معالجة السؤال. حاول مرة أخرى."
70
 
71
  # Gradio UI
72
+ logger.info("Setting up Gradio UI...")
73
  with gr.Blocks() as demo:
74
  gr.Markdown("### 🤖 اسأل عن التنمية المستدامة أو الموازنة التشاركية")
75
  with gr.Row():
76
+ inp = gr.Textbox(label="اكتب سؤالك هنا", placeholder="مثال: ما هي أهداف التنمية المستدامة؟", dir="rtl")
77
+ out = gr.Textbox(label="الإجابة", dir="rtl")
78
  btn = gr.Button("إجابة")
79
  btn.click(fn=answer_question, inputs=inp, outputs=out)
80
 
81
+ # Launch Gradio app (Hugging Face Spaces will handle this automatically)
82
+ demo.launch(server_name="0.0.0.0", server_port=7860)