eshameo045 commited on
Commit
676b0cc
·
1 Parent(s): b2e9e80
Files changed (1) hide show
  1. utils/llm_handler.py +56 -64
utils/llm_handler.py CHANGED
@@ -11,7 +11,10 @@ class LLMHandler:
11
  api_key = os.environ.get("SUPADATA_API_KEY")
12
  if not api_key:
13
  raise ValueError("SUPADATA_API_KEY environment variable not set!")
14
- self.client = OpenAI(api_key=api_key)
 
 
 
15
  self.model = "gpt-4o-mini"
16
 
17
  def _call_llm(self, system_prompt: str, user_prompt: str, max_tokens: int = 2000) -> str:
@@ -28,10 +31,10 @@ class LLMHandler:
28
 
29
  def _get_language_instruction(self, language: str) -> str:
30
  instructions = {
31
- "english": "Respond in clear, simple English.",
32
- "urdu": "صرف اردو میں جواب دیں۔ آسان اردو استعمال کریں۔",
33
- "roman_urdu": "Sirf Roman Urdu mein jawab do. English bilkul mat use karo. Jaise: 'Yeh topic bohot important hai kyunki...'"
34
- }
35
  return instructions.get(language, instructions["english"])
36
 
37
  def answer_question(self, question: str, context: str, language: str, video_title: str, history: list = []) -> str:
@@ -188,10 +191,11 @@ Use 8-12 nodes maximum."""
188
  {"from": "3", "to": "4", "label": ""}
189
  ]
190
  }
 
191
  def generate_quiz(self, transcript: str, language: str, video_title: str) -> list:
192
  lang_instruction = self._get_language_instruction(language)
193
  transcript_excerpt = transcript[:5000]
194
-
195
  system_prompt = f"""You are LectureLens AI. Generate MCQ quiz questions.
196
  {lang_instruction}
197
  IMPORTANT: Return ONLY a valid JSON array. No markdown, no backticks, no explanation.
@@ -232,63 +236,51 @@ Include:
232
  3. Unique Points in Video 2
233
  4. Which is better for beginners?"""
234
  return self._call_llm(system_prompt, user_prompt, max_tokens=2000)
235
-
236
-
237
-
238
 
239
  def check_educational(self, transcript: str, title: str = "") -> bool:
240
-
241
- reject_keywords = [
242
- 'recipe', 'cooking', 'cook', 'ingredient', 'tablespoon', 'teaspoon',
243
- 'karahi', 'biryani', 'curry', 'masala', 'chawal', 'gosht', 'daal',
244
- 'pakana', 'khana', 'paka', 'tail', 'namak', 'mirch', 'aata', 'maida',
245
- 'drama', 'episode', 'serial', 'actor', 'actress', 'scene',
246
- 'song', 'music', 'singer', 'lyrics', 'concert', 'album',
247
- 'cartoon', 'animation', 'anime', 'character',
248
- 'vlog', 'makeup', 'beauty', 'skincare', 'fashion', 'outfit',
249
- 'subscribe', 'like karo', 'follow karo', 'instagram',
250
- 'funny', 'comedy', 'prank', 'challenge',
251
- 'news', 'breaking news', 'reporter', 'anchor',
252
- 'game', 'gaming', 'gameplay', 'streamer',
253
- 'travel', 'trip', 'tour', 'vlog',
254
- 'reaction', 'review karte hain',
255
- ]
256
-
257
- accept_keywords = [
258
- 'lecture', 'lesson', 'chapter', 'topic', 'concept', 'definition',
259
- 'theory', 'algorithm', 'programming', 'code', 'function',
260
- 'mathematics', 'math', 'physics', 'chemistry', 'biology',
261
- 'history', 'geography', 'economics', 'psychology',
262
- 'tutorial', 'course', 'university', 'college', 'school',
263
- 'exam', 'assignment', 'hypothesis', 'equation', 'formula',
264
- 'theorem', 'proof', 'data', 'analysis', 'research',
265
- 'python', 'javascript', 'machine learning', 'artificial intelligence',
266
- 'database', 'network', 'compiler', 'accounting', 'finance',
267
- 'explain', 'understand', 'learn', 'study', 'education',
268
- 'parh', 'seekhna', 'samajhna', 'taleem', 'ilm',
269
- 'class', 'teacher', 'student', 'syllabus', 'notes',
270
- ]
271
-
272
- text = (transcript[:3000] + " " + title).lower()
273
-
274
- reject_count = sum(1 for kw in reject_keywords if kw in text)
275
- accept_count = sum(1 for kw in accept_keywords if kw in text)
276
-
277
- # 2 ya zyada reject keywords — reject!
278
- if reject_count >= 2:
279
- return False
280
-
281
- # 2 ya zyada accept keywords — allow!
282
- if accept_count >= 2:
283
- return True
284
-
285
- # Dono mein kuch nahi — AI se check
286
- try:
287
- system_prompt = """Strict classifier. Return ONLY 'yes' or 'no'.
288
- 'yes' ONLY for: university lecture, school lesson, coding tutorial, academic subject, professional skill training.
289
- 'no' for: cooking, drama, song, vlog, news, cartoon, gaming, fashion, travel, comedy, recipe."""
290
- user_prompt = f"Title: {title}\nTranscript: {transcript[:800]}\nIs this educational? yes or no only."
291
- response = self._call_llm(system_prompt, user_prompt, max_tokens=5)
292
- return 'yes' in response.lower().strip()
293
- except:
294
- return False
 
11
  api_key = os.environ.get("SUPADATA_API_KEY")
12
  if not api_key:
13
  raise ValueError("SUPADATA_API_KEY environment variable not set!")
14
+ self.client = OpenAI(
15
+ api_key=api_key,
16
+ base_url="https://api.supadata.ai/v1" # ← Supadata ka LLM endpoint
17
+ )
18
  self.model = "gpt-4o-mini"
19
 
20
  def _call_llm(self, system_prompt: str, user_prompt: str, max_tokens: int = 2000) -> str:
 
31
 
32
  def _get_language_instruction(self, language: str) -> str:
33
  instructions = {
34
+ "english": "Respond in clear, simple English.",
35
+ "urdu": "صرف اردو میں جواب دیں۔ آسان اردو استعمال کریں۔",
36
+ "roman_urdu": "Sirf Roman Urdu mein jawab do. English bilkul mat use karo. Jaise: 'Yeh topic bohot important hai kyunki...'"
37
+ }
38
  return instructions.get(language, instructions["english"])
39
 
40
  def answer_question(self, question: str, context: str, language: str, video_title: str, history: list = []) -> str:
 
191
  {"from": "3", "to": "4", "label": ""}
192
  ]
193
  }
194
+
195
  def generate_quiz(self, transcript: str, language: str, video_title: str) -> list:
196
  lang_instruction = self._get_language_instruction(language)
197
  transcript_excerpt = transcript[:5000]
198
+
199
  system_prompt = f"""You are LectureLens AI. Generate MCQ quiz questions.
200
  {lang_instruction}
201
  IMPORTANT: Return ONLY a valid JSON array. No markdown, no backticks, no explanation.
 
236
  3. Unique Points in Video 2
237
  4. Which is better for beginners?"""
238
  return self._call_llm(system_prompt, user_prompt, max_tokens=2000)
 
 
 
239
 
240
  def check_educational(self, transcript: str, title: str = "") -> bool:
241
+ """
242
+ FIX: Pehle sirf obvious non-educational content reject karo.
243
+ Doubt ho to ALLOW karo — reject mat karo.
244
+ """
245
+
246
+ # Sirf ye cheezein clearly reject karo
247
+ hard_reject_keywords = [
248
+ 'recipe', 'cooking', 'ingredient', 'tablespoon', 'teaspoon',
249
+ 'karahi', 'biryani', 'pakana', 'khana banana',
250
+ 'drama serial', 'episode dekho', 'actor', 'actress',
251
+ 'song lyrics', 'music video', 'concert',
252
+ 'makeup tutorial', 'skincare routine', 'fashion haul',
253
+ 'funny prank', 'comedy sketch',
254
+ 'gaming gameplay', 'game stream',
255
+ ]
256
+
257
+ # Ye hain to definitely educational hai
258
+ hard_accept_keywords = [
259
+ 'lecture', 'lesson', 'chapter', 'tutorial', 'course',
260
+ 'university', 'college', 'school', 'class',
261
+ 'algorithm', 'programming', 'python', 'javascript', 'machine learning',
262
+ 'mathematics', 'math', 'physics', 'chemistry', 'biology',
263
+ 'history', 'geography', 'economics', 'psychology',
264
+ 'exam', 'assignment', 'hypothesis', 'equation', 'formula',
265
+ 'theorem', 'proof', 'data structure', 'database',
266
+ 'artificial intelligence', 'deep learning', 'neural network',
267
+ 'accounting', 'finance', 'networking', 'compiler',
268
+ 'parh', 'seekhna', 'samajhna', 'taleem', 'ilm', 'sabaq',
269
+ 'teacher', 'student', 'syllabus', 'notes',
270
+ 'explain', 'definition', 'concept', 'theory',
271
+ ]
272
+
273
+ text = (transcript[:3000] + " " + title).lower()
274
+
275
+ # Koi bhi hard_accept mila turant allow
276
+ for kw in hard_accept_keywords:
277
+ if kw in text:
278
+ return True
279
+
280
+ # Sirf tab reject karo jab 3+ hard_reject keywords hon
281
+ reject_count = sum(1 for kw in hard_reject_keywords if kw in text)
282
+ if reject_count >= 3:
283
+ return False
284
+
285
+ # Baaki sab cases mein → ALLOW (doubt ka faida user ko do)
286
+ return True