mo-456 commited on
Commit
85241d2
ยท
verified ยท
1 Parent(s): d46adf9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -6
app.py CHANGED
@@ -9,6 +9,7 @@ import numpy as np
9
  from sklearn.feature_extraction.text import TfidfVectorizer
10
  from sklearn.metrics.pairwise import cosine_similarity
11
  import json
 
12
 
13
  # Configure logging
14
  logging.basicConfig(
@@ -30,6 +31,40 @@ except Exception as e:
30
  logger.error(f"Model loading failed: {str(e)}")
31
  raise RuntimeError("Failed to initialize the AI model")
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  # Initialize TF-IDF for hybrid search
34
  tfidf_vectorizer = TfidfVectorizer(
35
  max_features=1000,
@@ -299,7 +334,7 @@ class ResponseGenerator:
299
  }
300
 
301
  def generate_response(self, question: str, retrieved_chunks: List[Tuple[str, float, str]], question_type: str) -> str:
302
- """Generate professionally formatted Arabic responses"""
303
  try:
304
  if not retrieved_chunks:
305
  return self._generate_fallback_response(question)
@@ -314,7 +349,16 @@ class ResponseGenerator:
314
  # Get template info
315
  template_info = self.response_templates.get(question_type, self.response_templates['general'])
316
 
317
- # Build professional response
 
 
 
 
 
 
 
 
 
318
  response = self._build_response_header(question, template_info)
319
  response += self._build_main_content(sections, template_info)
320
  response += self._build_additional_info(sections)
@@ -327,6 +371,74 @@ class ResponseGenerator:
327
  logger.error(f"Response generation failed: {str(e)}")
328
  return self._generate_error_response()
329
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  def _build_response_header(self, question: str, template_info: Dict) -> str:
331
  """Build professional response header"""
332
  header = f"""
@@ -624,8 +736,8 @@ with gr.Blocks(css=css, title="ุงู„ู…ุณุงุนุฏ ุงู„ุขู„ูŠ ู„ู„ู…ูˆุงุฒู†ุฉ ุงู„ุช
624
  with gr.Column(elem_classes="arabic-ui"):
625
  gr.Markdown("""
626
  <div class="header">
627
- <h1>๐Ÿค– ุงู„ู…ุณุงุนุฏ ุงู„ุขู„ูŠ ุงู„ู…ุทูˆุฑ ู„ู„ู…ูˆุงุฒู†ุฉ ุงู„ุชุดุงุฑูƒูŠุฉ</h1>
628
- <p>ู†ุณุฎุฉ ู…ุญุณู‘ู†ุฉ ุชู‚ุฏู… ุฅุฌุงุจุงุช ุฃูƒุซุฑ ุฏู‚ุฉ ูˆู…ู‡ู†ูŠุฉ ุญูˆู„ ุงู„ุดูุงููŠุฉ ุงู„ู…ุงู„ูŠุฉ ูˆุงู„ู…ุดุงุฑูƒุฉ ุงู„ู…ุฌุชู…ุนูŠุฉ</p>
629
  </div>
630
  """)
631
 
@@ -666,7 +778,7 @@ with gr.Blocks(css=css, title="ุงู„ู…ุณุงุนุฏ ุงู„ุขู„ูŠ ู„ู„ู…ูˆุงุฒู†ุฉ ุงู„ุช
666
  gr.Markdown("""
667
  <div class="footer">
668
  <p><strong>ูˆุญุฏุฉ ุงู„ุดูุงููŠุฉ ูˆุงู„ู…ุดุงุฑูƒุฉ ุงู„ู…ุฌุชู…ุนูŠุฉ - ูˆุฒุงุฑุฉ ุงู„ู…ุงู„ูŠุฉ</strong></p>
669
- <p>ู†ุณุฎุฉ ู…ุญุณู‘ู†ุฉ ู…ุน ุชู‚ู†ูŠุงุช ุงู„ุจุญุซ ุงู„ู…ุชู‚ุฏู…ุฉ ูˆุชูˆู„ูŠุฏ ุงู„ุฅุฌุงุจุงุช ุงู„ุฐูƒูŠุฉ</p>
670
  </div>
671
  """)
672
 
@@ -681,4 +793,4 @@ if __name__ == "__main__":
681
  server_port=7860,
682
  share=False,
683
  show_error=True
684
- )
 
9
  from sklearn.feature_extraction.text import TfidfVectorizer
10
  from sklearn.metrics.pairwise import cosine_similarity
11
  import json
12
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
13
 
14
  # Configure logging
15
  logging.basicConfig(
 
31
  logger.error(f"Model loading failed: {str(e)}")
32
  raise RuntimeError("Failed to initialize the AI model")
33
 
34
+ # Initialize Arabic LLM for text generation and rephrasing
35
+ try:
36
+ logger.info("Loading Arabic LLM for text generation...")
37
+ # Using ArabianGPT for Arabic text generation
38
+ llm_model_name = "riotu-lab/ArabianGPT-01B"
39
+
40
+ # Load tokenizer and model
41
+ llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
42
+ llm_model = AutoModelForCausalLM.from_pretrained(
43
+ llm_model_name,
44
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
45
+ device_map="auto" if torch.cuda.is_available() else None
46
+ )
47
+
48
+ # Create text generation pipeline
49
+ text_generator = pipeline(
50
+ "text-generation",
51
+ model=llm_model,
52
+ tokenizer=llm_tokenizer,
53
+ max_length=512,
54
+ do_sample=True,
55
+ temperature=0.7,
56
+ top_p=0.9,
57
+ pad_token_id=llm_tokenizer.eos_token_id
58
+ )
59
+
60
+ logger.info("Arabic LLM loaded successfully")
61
+ llm_available = True
62
+
63
+ except Exception as e:
64
+ logger.warning(f"LLM loading failed: {str(e)}. Falling back to basic response generation.")
65
+ text_generator = None
66
+ llm_available = False
67
+
68
  # Initialize TF-IDF for hybrid search
69
  tfidf_vectorizer = TfidfVectorizer(
70
  max_features=1000,
 
334
  }
335
 
336
  def generate_response(self, question: str, retrieved_chunks: List[Tuple[str, float, str]], question_type: str) -> str:
337
+ """Generate professionally formatted Arabic responses with LLM enhancement"""
338
  try:
339
  if not retrieved_chunks:
340
  return self._generate_fallback_response(question)
 
349
  # Get template info
350
  template_info = self.response_templates.get(question_type, self.response_templates['general'])
351
 
352
+ # Extract raw content for LLM processing
353
+ raw_content = self._extract_raw_content(sections)
354
+
355
+ # Use LLM to enhance and rephrase the response if available
356
+ if llm_available and raw_content:
357
+ enhanced_response = self._generate_llm_enhanced_response(question, raw_content, template_info)
358
+ if enhanced_response:
359
+ return enhanced_response
360
+
361
+ # Fallback to original response generation
362
  response = self._build_response_header(question, template_info)
363
  response += self._build_main_content(sections, template_info)
364
  response += self._build_additional_info(sections)
 
371
  logger.error(f"Response generation failed: {str(e)}")
372
  return self._generate_error_response()
373
 
374
+ def _extract_raw_content(self, sections: Dict) -> str:
375
+ """Extract raw content from sections for LLM processing"""
376
+ content_parts = []
377
+ for section, chunks in sections.items():
378
+ for chunk, score in chunks[:2]: # Take top 2 chunks per section
379
+ if ":" in chunk:
380
+ content = chunk.split(":", 1)[1].strip()
381
+ content_parts.append(content)
382
+
383
+ return " ".join(content_parts[:3]) # Limit to avoid token limits
384
+
385
+ def _generate_llm_enhanced_response(self, question: str, raw_content: str, template_info: Dict) -> str:
386
+ """Generate enhanced response using LLM"""
387
+ try:
388
+ # Create a prompt for the LLM
389
+ prompt = f"""ุจู†ุงุกู‹ ุนู„ู‰ ุงู„ู…ุนู„ูˆู…ุงุช ุงู„ุชุงู„ูŠุฉุŒ ุฃุฌุจ ุนู„ู‰ ุงู„ุณุคุงู„ ุจุทุฑูŠู‚ุฉ ู…ู‡ู†ูŠุฉ ูˆู…ูุตู„ุฉ:
390
+
391
+ ุงู„ุณุคุงู„: {question}
392
+
393
+ ุงู„ู…ุนู„ูˆู…ุงุช ุงู„ู…ุชุงุญุฉ: {raw_content}
394
+
395
+ ุงู„ุฅุฌุงุจุฉ ุงู„ู…ุทู„ูˆุจุฉ ูŠุฌุจ ุฃู† ุชูƒูˆู†:
396
+ - ู…ู‡ู†ูŠุฉ ูˆู…ู†ุธู…ุฉ
397
+ - ุจุงู„ู„ุบุฉ ุงู„ุนุฑุจูŠุฉ ุงู„ูุตุญู‰
398
+ - ุชุญุชูˆูŠ ุนู„ู‰ ุชูุงุตูŠู„ ู…ููŠุฏุฉ
399
+ - ู…ู†ุงุณุจุฉ ู„ู…ูˆุถูˆุน ุงู„ู…ูˆุงุฒู†ุฉ ุงู„ุชุดุงุฑูƒูŠุฉ ูˆุงู„ุดูุงููŠุฉ ุงู„ู…ุงู„ูŠุฉ
400
+
401
+ ุงู„ุฅุฌุงุจุฉ:"""
402
+
403
+ # Generate response using LLM
404
+ generated = text_generator(
405
+ prompt,
406
+ max_length=400,
407
+ num_return_sequences=1,
408
+ temperature=0.7,
409
+ do_sample=True,
410
+ pad_token_id=llm_tokenizer.eos_token_id
411
+ )
412
+
413
+ if generated and len(generated) > 0:
414
+ full_response = generated[0]['generated_text']
415
+ # Extract only the answer part after "ุงู„ุฅุฌุงุจุฉ:"
416
+ if "ุงู„ุฅุฌุงุจุฉ:" in full_response:
417
+ answer = full_response.split("ุงู„ุฅุฌุงุจุฉ:")[-1].strip()
418
+
419
+ # Format the enhanced response
420
+ formatted_response = f"""
421
+ โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•—
422
+ โ•‘ {template_info["icon"]} **{template_info["title"]}**
423
+ โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
424
+
425
+ **ุงุณุชุนู„ุงู…ูƒ:** {question}
426
+
427
+ ## ุงู„ุฅุฌุงุจุฉ ุงู„ู…ุทูˆุฑุฉ
428
+
429
+ {answer}
430
+
431
+ ---
432
+ **ู„ู„ู…ุฒูŠุฏ ู…ู† ุงู„ู…ุนู„ูˆู…ุงุช:** ุชูˆุงุตู„ ู…ุน ูˆุญุฏุฉ ุงู„ุดูุงููŠุฉ ูˆุงู„ู…ุดุงุฑูƒุฉ ุงู„ู…ุฌุชู…ุนูŠุฉ
433
+ **ุงู„ู…ุตุฏุฑ:** ูˆุฒุงุฑุฉ ุงู„ู…ุงู„ูŠุฉ - ุฌู…ู‡ูˆุฑูŠุฉ ู…ุตุฑ ุงู„ุนุฑุจูŠุฉ
434
+ """
435
+ return formatted_response
436
+
437
+ except Exception as e:
438
+ logger.error(f"LLM enhancement failed: {str(e)}")
439
+
440
+ return None
441
+
442
  def _build_response_header(self, question: str, template_info: Dict) -> str:
443
  """Build professional response header"""
444
  header = f"""
 
736
  with gr.Column(elem_classes="arabic-ui"):
737
  gr.Markdown("""
738
  <div class="header">
739
+ <h1>ุงู„ู…ุณุงุนุฏ ุงู„ุขู„ูŠ ุงู„ู…ุทูˆุฑ ู„ู„ู…ูˆุงุฒู†ุฉ ุงู„ุชุดุงุฑูƒูŠุฉ ู…ุน ุงู„ุฐูƒุงุก ุงู„ุงุตุทู†ุงุนูŠ</h1>
740
+ <p>ู†ุณุฎุฉ ู…ุญุณู‘ู†ุฉ ู…ุน ู†ู…ูˆุฐุฌ ู„ุบูˆูŠ ุฐูƒูŠ ู„ุฅุนุงุฏุฉ ุตูŠุงุบุฉ ุงู„ุฅุฌุงุจุงุช ูˆุชูˆู„ูŠุฏ ู…ุญุชูˆู‰ ุฃูƒุซุฑ ุฏู‚ุฉ ูˆู…ู‡ู†ูŠุฉ</p>
741
  </div>
742
  """)
743
 
 
778
  gr.Markdown("""
779
  <div class="footer">
780
  <p><strong>ูˆุญุฏุฉ ุงู„ุดูุงููŠุฉ ูˆุงู„ู…ุดุงุฑูƒุฉ ุงู„ู…ุฌุชู…ุนูŠุฉ - ูˆุฒุงุฑุฉ ุงู„ู…ุงู„ูŠุฉ</strong></p>
781
+ <p>ู†ุณุฎุฉ ู…ุญุณู‘ู†ุฉ ู…ุน ู†ู…ูˆุฐุฌ ู„ุบูˆูŠ ุฐูƒูŠ ู„ุฅุนุงุฏุฉ ุตูŠุงุบุฉ ุงู„ุฅุฌุงุจุงุช ูˆุชูˆู„ูŠุฏ ู…ุญุชูˆู‰ ุฃูƒุซุฑ ุฏู‚ุฉ ูˆู…ู‡ู†ูŠุฉ</p>
782
  </div>
783
  """)
784
 
 
793
  server_port=7860,
794
  share=False,
795
  show_error=True
796
+ )