mo-456 commited on
Commit
989f90a
·
verified ·
1 Parent(s): 222dde1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +156 -65
app.py CHANGED
@@ -5,6 +5,7 @@ import logging
5
  from typing import List
6
  import re
7
  import numpy as np
 
8
 
9
  # Configure advanced logging
10
  logging.basicConfig(
@@ -13,29 +14,65 @@ logging.basicConfig(
13
  )
14
  logger = logging.getLogger(__name__)
15
 
16
- # Load model with enhanced settings
17
- model = SentenceTransformer(
18
- "CAMeL-Lab/bert-base-arabic-camelbert-ca",
19
- device="cuda" if torch.cuda.is_available() else "cpu"
20
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- # Advanced knowledge loader with semantic organization
23
  def load_knowledge():
24
- with open("knowledge.txt", "r", encoding="utf-8") as f:
 
 
 
 
 
 
 
 
 
 
25
  sections = {}
26
  current_section = ""
27
 
28
- for line in f:
29
- line = line.strip()
30
- if line.startswith("## "):
31
- current_section = line[3:]
32
- sections[current_section] = []
33
- elif line and current_section:
34
- sections[current_section].append(line)
35
-
36
- # Create semantic chunks
37
  chunks = []
38
  chunk_ids = []
 
39
  for section, content in sections.items():
40
  section_text = " ".join(content)
41
  sentences = re.split(r'[\.\n]', section_text)
@@ -58,65 +95,87 @@ def load_knowledge():
58
  chunk_ids.append(section)
59
 
60
  return chunks, chunk_ids
 
 
 
 
61
 
62
- knowledge_chunks, chunk_categories = load_knowledge()
63
- knowledge_embeddings = model.encode(knowledge_chunks, convert_to_tensor=True)
64
-
65
- # Advanced Arabic response generator
66
  def generate_arabic_response(question, top_chunks):
67
- response = "المساعد الآلي لوحدة الشفافية\n\n"
68
-
69
- # Analyze question type
70
- question_type = ام" # default
71
- q_words = question.split()
72
-
73
- if any(w in ["كيف", "طريقة", "خطوات"] for w in q_words):
74
- question_type = "إجرائي"
75
- elif any(w in ["ما هي", "ما هو", "تعريف"] for w in q_words):
76
- question_type = "تعريفي"
77
- elif any(w in ["لماذا", "سبب", "أسباب"] for w in q_words):
78
- question_type = "تفسيري"
79
-
80
- # Generate context-aware response
81
- if question_type == "تعريفي":
82
- response += "بناءً على سؤالك عن المفاهيم الأساسية:\n\n"
83
- elif question_type == "إجرائي":
84
- response += "لتنفيذ ما تبحث عنه، إليك الخطوات العملية:\n\n"
85
- else:
86
- response += "إليك الإجابة الشاملة على سؤالك:\n\n"
87
-
88
- # Build comprehensive answer
89
- used_sections = set()
90
- for chunk, score in top_chunks:
91
- section = chunk.split(":")[0]
92
- if section not in used_sections and score > 0.35:
93
- response += f"• {chunk}\n\n"
94
- used_sections.add(section)
95
-
96
- # Add intelligent follow-up
97
- if len(used_sections) > 1:
98
- response += "\nهذه المعلومات مترابطة حيث أن "
99
- response += " و".join(list(used_sections)[:3]) + " جوانب متكاملة."
100
-
101
- return response
 
 
 
 
 
 
 
 
 
102
 
103
  def answer_question(question):
 
104
  try:
105
  if not question.strip():
106
  return "الرجاء إدخال سؤال واضح ومحدد"
107
 
 
 
 
 
108
  # Arabic question preprocessing
109
  question = re.sub(r'[؟\?]', '', question).strip()
 
 
 
110
  question_embedding = model.encode(question, convert_to_tensor=True)
 
111
 
112
- # Semantic search with diversity
113
  cos_scores = util.cos_sim(question_embedding, knowledge_embeddings)[0]
114
  top_k = min(5, len(knowledge_chunks))
115
-
116
- # Get diverse results from different sections
117
  top_indices = torch.topk(cos_scores, k=top_k).indices.tolist()
118
- top_chunks = [(knowledge_chunks[idx], cos_scores[idx].item())
119
- for idx in top_indices if cos_scores[idx] > 0.3]
 
 
 
 
 
 
120
 
121
  if not top_chunks:
122
  return "لم أجد إجابة دقيقة، لكن يمكنك:\n- صياغة السؤال بطريقة أخرى\n- الرجوع للوثائق الرسمية"
@@ -124,9 +183,15 @@ def answer_question(question):
124
  return generate_arabic_response(question, top_chunks)
125
 
126
  except Exception as e:
127
- logger.error(f"Error: {str(e)}")
128
  return "حدث خطأ تقني، يرجى المحاولة لاحقاً"
129
 
 
 
 
 
 
 
130
  # Modern Arabic UI
131
  css = """
132
  .arabic-ui {
@@ -139,6 +204,12 @@ css = """
139
  color: white;
140
  padding: 20px;
141
  border-radius: 8px;
 
 
 
 
 
 
142
  }
143
  """
144
 
@@ -151,8 +222,16 @@ with gr.Blocks(css=css) as demo:
151
  </div>
152
  """)
153
 
154
- question = gr.Textbox(label="اكتب سؤالك هنا", placeholder="مثال: ما هي مراحل الموازنة التشاركية؟")
155
- answer = gr.Textbox(label="الإجابة", interactive=False)
 
 
 
 
 
 
 
 
156
 
157
  gr.Examples(
158
  examples=[
@@ -160,10 +239,22 @@ with gr.Blocks(css=css) as demo:
160
  ["كيف يمكن المشاركة في الموازنة التشاركية؟"],
161
  ["ما دور ديوان المحاسبة في تحقيق الشفافية؟"]
162
  ],
163
- inputs=question
 
164
  )
165
 
166
  submit = gr.Button("الحصول على إجابة ذكية")
167
  submit.click(answer_question, inputs=question, outputs=answer)
 
 
 
 
 
 
168
 
169
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
5
  from typing import List
6
  import re
7
  import numpy as np
8
+ import os
9
 
10
  # Configure advanced logging
11
  logging.basicConfig(
 
14
  )
15
  logger = logging.getLogger(__name__)
16
 
17
+ # Initialize variables
18
+ model = None
19
+ knowledge_chunks = []
20
+ knowledge_embeddings = None
21
+
22
+ def initialize_components():
23
+ """Initialize model and knowledge base with error handling"""
24
+ global model, knowledge_chunks, knowledge_embeddings
25
+
26
+ # Model loading with fallback
27
+ try:
28
+ model = SentenceTransformer(
29
+ "CAMeL-Lab/bert-base-arabic-camelbert-ca",
30
+ device="cuda" if torch.cuda.is_available() else "cpu"
31
+ )
32
+ logger.info(f"Model loaded on device: {model.device}")
33
+ except Exception as e:
34
+ logger.error(f"Model loading failed: {str(e)}")
35
+ raise RuntimeError("Failed to initialize the AI model")
36
+
37
+ # Knowledge base loading
38
+ try:
39
+ knowledge_chunks, _ = load_knowledge()
40
+ if not knowledge_chunks:
41
+ raise ValueError("No knowledge chunks loaded - check knowledge.txt")
42
+
43
+ knowledge_embeddings = model.encode(knowledge_chunks, convert_to_tensor=True)
44
+ logger.info(f"Successfully loaded {len(knowledge_chunks)} knowledge chunks")
45
+ except Exception as e:
46
+ logger.error(f"Knowledge base loading failed: {str(e)}")
47
+ raise RuntimeError("Failed to initialize knowledge base")
48
 
 
49
  def load_knowledge():
50
+ """Load and process knowledge file with validation"""
51
+ try:
52
+ if not os.path.exists("knowledge.txt"):
53
+ raise FileNotFoundError("knowledge.txt file not found")
54
+
55
+ with open("knowledge.txt", "r", encoding="utf-8") as f:
56
+ content = f.read().strip()
57
+ if not content:
58
+ raise ValueError("knowledge.txt is empty")
59
+
60
+ # Process knowledge file
61
  sections = {}
62
  current_section = ""
63
 
64
+ with open("knowledge.txt", "r", encoding="utf-8") as f:
65
+ for line in f:
66
+ line = line.strip()
67
+ if line.startswith("## "):
68
+ current_section = line[3:]
69
+ sections[current_section] = []
70
+ elif line and current_section:
71
+ sections[current_section].append(line)
72
+
73
  chunks = []
74
  chunk_ids = []
75
+
76
  for section, content in sections.items():
77
  section_text = " ".join(content)
78
  sentences = re.split(r'[\.\n]', section_text)
 
95
  chunk_ids.append(section)
96
 
97
  return chunks, chunk_ids
98
+
99
+ except Exception as e:
100
+ logger.error(f"Error loading knowledge: {str(e)}")
101
+ raise
102
 
 
 
 
 
103
  def generate_arabic_response(question, top_chunks):
104
+ """Generate response with validation"""
105
+ try:
106
+ if not top_chunks:
107
+ return "لم أجد معلومات كافية للإجابة على سؤالك"
108
+
109
+ response = "المساعد الآلي لوحدة الشفافية\n\n"
110
+
111
+ # Analyze question type
112
+ question_type = "عام"
113
+ q_words = question.split()
114
+
115
+ if any(w in ["كيف", "طريقة", "خطوات"] for w in q_words):
116
+ question_type = "إجرائي"
117
+ elif any(w in ["ما هي", "ما هو", "تعريف"] for w in q_words):
118
+ question_type = "تعريفي"
119
+ elif any(w in ["لماذا", "سبب", سباب"] for w in q_words):
120
+ question_type = "تفسيري"
121
+
122
+ # Generate context-aware response
123
+ if question_type == "تعريفي":
124
+ response += "بناءً على سؤالك عن المفاهيم الأساسية:\n\n"
125
+ elif question_type == "إجرائي":
126
+ response += "لتنفيذ ما تبحث عنه، إليك الخطوات العملية:\n\n"
127
+ else:
128
+ response += "إليك الإجابة الشاملة على سؤالك:\n\n"
129
+
130
+ # Build comprehensive answer
131
+ used_sections = set()
132
+ for chunk, score in top_chunks:
133
+ section = chunk.split(":")[0]
134
+ if section not in used_sections and score > 0.35:
135
+ response += f"• {chunk}\n\n"
136
+ used_sections.add(section)
137
+
138
+ # Add intelligent follow-up
139
+ if len(used_sections) > 1:
140
+ response += "\nهذه المعلومات مترابطة حيث أن "
141
+ response += " و".join(list(used_sections)[:3]) + " جوانب متكاملة."
142
+
143
+ return response
144
+
145
+ except Exception as e:
146
+ logger.error(f"Error generating response: {str(e)}")
147
+ return "حدث خطأ أثناء توليد الإجابة"
148
 
149
  def answer_question(question):
150
+ """Main question answering function with comprehensive error handling"""
151
  try:
152
  if not question.strip():
153
  return "الرجاء إدخال سؤال واضح ومحدد"
154
 
155
+ # Validate components are loaded
156
+ if model is None or not knowledge_chunks:
157
+ initialize_components()
158
+
159
  # Arabic question preprocessing
160
  question = re.sub(r'[؟\?]', '', question).strip()
161
+ logger.info(f"Processing question: '{question}'")
162
+
163
+ # Encode question
164
  question_embedding = model.encode(question, convert_to_tensor=True)
165
+ logger.info("Question encoded successfully")
166
 
167
+ # Semantic search
168
  cos_scores = util.cos_sim(question_embedding, knowledge_embeddings)[0]
169
  top_k = min(5, len(knowledge_chunks))
 
 
170
  top_indices = torch.topk(cos_scores, k=top_k).indices.tolist()
171
+
172
+ top_chunks = [
173
+ (knowledge_chunks[idx], cos_scores[idx].item())
174
+ for idx in top_indices
175
+ if cos_scores[idx] > 0.3
176
+ ]
177
+
178
+ logger.info(f"Found {len(top_chunks)} relevant chunks (max score: {max(cos_scores).item():.2f})")
179
 
180
  if not top_chunks:
181
  return "لم أجد إجابة دقيقة، لكن يمكنك:\n- صياغة السؤال بطريقة أخرى\n- الرجوع للوثائق الرسمية"
 
183
  return generate_arabic_response(question, top_chunks)
184
 
185
  except Exception as e:
186
+ logger.error(f"Error answering question: {str(e)}", exc_info=True)
187
  return "حدث خطأ تقني، يرجى المحاولة لاحقاً"
188
 
189
+ # Initialize components when starting
190
+ try:
191
+ initialize_components()
192
+ except Exception as e:
193
+ logger.error(f"Initialization failed: {str(e)}")
194
+
195
  # Modern Arabic UI
196
  css = """
197
  .arabic-ui {
 
204
  color: white;
205
  padding: 20px;
206
  border-radius: 8px;
207
+ margin-bottom: 20px;
208
+ }
209
+ .footer {
210
+ margin-top: 20px;
211
+ font-size: 0.9em;
212
+ color: #666;
213
  }
214
  """
215
 
 
222
  </div>
223
  """)
224
 
225
+ question = gr.Textbox(
226
+ label="اكتب سؤالك هنا",
227
+ placeholder="مثال: ما هي مراحل الموازنة التشاركية؟",
228
+ lines=3
229
+ )
230
+ answer = gr.Textbox(
231
+ label="الإجابة",
232
+ interactive=False,
233
+ lines=10
234
+ )
235
 
236
  gr.Examples(
237
  examples=[
 
239
  ["كيف يمكن المشاركة في الموازنة التشاركية؟"],
240
  ["ما دور ديوان المحاسبة في تحقيق الشفافية؟"]
241
  ],
242
+ inputs=question,
243
+ label="أسئلة مثاليه"
244
  )
245
 
246
  submit = gr.Button("الحصول على إجابة ذكية")
247
  submit.click(answer_question, inputs=question, outputs=answer)
248
+
249
+ gr.Markdown("""
250
+ <div class="footer">
251
+ <p>لأي استفسارات تقنية، يرجى التواصل مع فريق الدعم</p>
252
+ </div>
253
+ """)
254
 
255
+ # Launch with error handling
256
+ try:
257
+ demo.launch(server_name="0.0.0.0", server_port=7860)
258
+ except Exception as e:
259
+ logger.error(f"Failed to launch app: {str(e)}")
260
+ print(f"Failed to launch app: {str(e)}")