Ahmed-Alghamdi commited on
Commit
b720e58
·
verified ·
1 Parent(s): 98cc026

Update response_generator.py

Browse files
Files changed (1) hide show
  1. response_generator.py +53 -108
response_generator.py CHANGED
@@ -1,5 +1,5 @@
1
  # response_generator.py
2
- import re
3
  from utils import setup_logger
4
  from config import Config
5
 
@@ -8,117 +8,62 @@ logger = setup_logger('response_generator')
8
  class ResponseGenerator:
9
  def __init__(self):
10
  """
11
- Smart extraction without complex LLM
12
  """
13
- logger.info("Response generator initialized (extractive mode)")
14
-
 
 
 
 
 
 
 
15
  def generate_response(self, query, relevant_docs):
16
  """
17
- Generate smart answer by extracting and summarizing
18
  """
 
 
 
 
 
 
 
 
19
  try:
20
- if len(relevant_docs) == 0:
21
- return "عذرًا، لم أجد أي معلومات ذات صلة في المستندات."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- return self._generate_smart_extractive_answer(query, relevant_docs)
24
-
25
  except Exception as e:
26
- logger.error(f"Error generating response: {e}")
27
- return "عذرًا، لم أتمكن من إنشاء استجابة."
28
-
29
- def _generate_smart_extractive_answer(self, query, relevant_docs):
30
- """
31
- Smart extraction with sentence selection
32
- """
33
- top_chunks = relevant_docs.head(3)
34
-
35
- query_keywords = self._extract_keywords(query)
36
-
37
- best_sentences = []
38
-
39
- for idx, row in top_chunks.iterrows():
40
- content = row['content']
41
- similarity = row.get('similarity_score', 0)
42
-
43
- sentences = self._split_sentences(content)
44
-
45
- for sentence in sentences:
46
- if len(sentence.strip()) < 20:
47
- continue
48
-
49
- score = similarity
50
-
51
- sentence_lower = sentence.lower()
52
- for keyword in query_keywords:
53
- if keyword in sentence_lower:
54
- score += 0.1
55
-
56
- best_sentences.append({
57
- 'text': sentence.strip(),
58
- 'score': score,
59
- 'chunk_id': idx
60
- })
61
-
62
- best_sentences.sort(key=lambda x: x['score'], reverse=True)
63
-
64
- top_sentences = best_sentences[:3]
65
-
66
- if not top_sentences:
67
- return self._format_simple_answer(top_chunks)
68
-
69
- answer_parts = ["**الإجابة:**\n"]
70
-
71
- for i, sent in enumerate(top_sentences, 1):
72
- if len(top_sentences) > 1:
73
- answer_parts.append(f"\n**[{i}]** {sent['text']}")
74
- else:
75
- answer_parts.append(f"\n{sent['text']}")
76
-
77
- answer_parts.append("\n\n---")
78
-
79
- scores = top_chunks['similarity_score'].values if 'similarity_score' in top_chunks.columns else []
80
- answer_parts.append(f"**عدد المصادر:** {len(top_chunks)} chunks")
81
-
82
- if len(scores) > 0:
83
- answer_parts.append(f" | **دقة المطابقة:** {scores[-1]:.0%} - {scores[0]:.0%}")
84
-
85
- return "\n".join(answer_parts)
86
-
87
- def _extract_keywords(self, query):
88
- """
89
- Extract meaningful keywords from query
90
- """
91
- stop_words = {'ما', 'هي', 'هو', 'في', 'من', 'إلى', 'على', 'عن', 'ال', 'و', 'أو'}
92
-
93
- words = query.lower().split()
94
- keywords = [w for w in words if len(w) > 2 and w not in stop_words]
95
-
96
- return keywords
97
-
98
- def _split_sentences(self, text):
99
- """
100
- Split text into sentences
101
- """
102
- sentences = re.split(r'[.؟!]\s+', text)
103
- return [s.strip() for s in sentences if s.strip()]
104
-
105
- def _format_simple_answer(self, top_chunks):
106
- """
107
- Fallback: show top chunk
108
- """
109
- best_chunk = top_chunks.iloc[0]
110
- content = best_chunk['content']
111
-
112
- if len(content) > 400:
113
- content = content[:400]
114
- last_period = max(content.rfind('.'), content.rfind('؟'), content.rfind('!'))
115
- if last_period > 100:
116
- content = content[:last_period + 1]
117
- else:
118
- content += "..."
119
-
120
- score = best_chunk.get('similarity_score', 0)
121
-
122
- answer = f"**الإجابة:**\n\n{content}\n\n---\n**دقة المطابقة:** {score:.0%}"
123
-
124
- return answer
 
1
  # response_generator.py
2
+ from openai import OpenAI
3
  from utils import setup_logger
4
  from config import Config
5
 
 
8
  class ResponseGenerator:
9
  def __init__(self):
10
  """
11
+ Initialize connection to OpenAI
12
  """
13
+ logger.info("Response generator initialized (LLM mode)")
14
+
15
+ # Check if API Key exists
16
+ if not Config.OPENAI_API_KEY:
17
+ logger.error("OPENAI_API_KEY is missing in Config or Environment variables.")
18
+ self.client = None
19
+ else:
20
+ self.client = OpenAI(api_key=Config.OPENAI_API_KEY)
21
+
22
  def generate_response(self, query, relevant_docs):
23
  """
24
+ Generate a formalized short answer using LLM based on retrieved docs
25
  """
26
+ # 1. Handle no results found
27
+ if len(relevant_docs) == 0:
28
+ return "عذرًا، لم أجد أي معلومات ذات صلة في المستندات."
29
+
30
+ # 2. Handle missing API Key gracefully
31
+ if not self.client:
32
+ return "عذرًا، لم يتم إعداد مفتاح API الخاص بالنموذج اللغوي."
33
+
34
  try:
35
+ # 3. Construct the Context
36
+ # We combine the content of the top retrieved chunks
37
+ context_text = "\n\n".join(relevant_docs['content'].tolist())
38
+
39
+ # 4. Define the System Prompt
40
+ # Instructions: Act as a helpful assistant, use Arabic, be formal and short.
41
+ system_instruction = (
42
+ "أنت مساعد ذكي ومحترف. مهمتك هي الإجابة على سؤال المستخدم بدقة."
43
+ "استخدم فقط المعلومات الواردة في 'السياق' أدناه."
44
+ "إذا لم تكن الإجابة موجودة في السياق، قل 'لا تتوفر معلومات'."
45
+ "اجعل إجابتك قصيرة، رسمية، ومباشرة."
46
+ )
47
+
48
+ # 5. Define the User Message
49
+ user_message = f"السياق:\n{context_text}\n\nالسؤال: {query}"
50
+
51
+ # 6. Call OpenAI API
52
+ response = self.client.chat.completions.create(
53
+ model=Config.OPENAI_MODEL,
54
+ messages=[
55
+ {"role": "system", "content": system_instruction},
56
+ {"role": "user", "content": user_message}
57
+ ],
58
+ temperature=0.3, # Low temperature for more factual/consistent answers
59
+ max_tokens=200 # Limit tokens to ensure a short answer
60
+ )
61
+
62
+ # 7. Extract the answer
63
+ answer = response.choices[0].message.content.strip()
64
 
65
+ return answer
66
+
67
  except Exception as e:
68
+ logger.error(f"Error generating LLM response: {e}")
69
+ return "عذرًا، واجهت مشكلة أثناء صياغة الإجابة."