Spaces:

eshameo045
/

LectureLens_AI

Sleeping

App Files Files Community

eshameo045 commited on 20 days ago

Commit

676b0cc

1 Parent(s): b2e9e80

update

Browse files

Files changed (1) hide show

utils/llm_handler.py +56 -64

utils/llm_handler.py CHANGED Viewed

@@ -11,7 +11,10 @@ class LLMHandler:
         api_key = os.environ.get("SUPADATA_API_KEY")
         if not api_key:
             raise ValueError("SUPADATA_API_KEY environment variable not set!")
-        self.client = OpenAI(api_key=api_key)
         self.model = "gpt-4o-mini"
     def _call_llm(self, system_prompt: str, user_prompt: str, max_tokens: int = 2000) -> str:
@@ -28,10 +31,10 @@ class LLMHandler:
     def _get_language_instruction(self, language: str) -> str:
         instructions = {
-        "english": "Respond in clear, simple English.",
-        "urdu": "صرف اردو میں جواب دیں۔ آسان اردو استعمال کریں۔",
-        "roman_urdu": "Sirf Roman Urdu mein jawab do. English bilkul mat use karo. Jaise: 'Yeh topic bohot important hai kyunki...'"
-    }
         return instructions.get(language, instructions["english"])
     def answer_question(self, question: str, context: str, language: str, video_title: str, history: list = []) -> str:
@@ -188,10 +191,11 @@ Use 8-12 nodes maximum."""
                 {"from": "3", "to": "4", "label": ""}
             ]
         }
     def generate_quiz(self, transcript: str, language: str, video_title: str) -> list:
         lang_instruction = self._get_language_instruction(language)
         transcript_excerpt = transcript[:5000]
         system_prompt = f"""You are LectureLens AI. Generate MCQ quiz questions.
 {lang_instruction}
 IMPORTANT: Return ONLY a valid JSON array. No markdown, no backticks, no explanation.
@@ -232,63 +236,51 @@ Include:
 3. Unique Points in Video 2
 4. Which is better for beginners?"""
         return self._call_llm(system_prompt, user_prompt, max_tokens=2000)
     def check_educational(self, transcript: str, title: str = "") -> bool:
-      reject_keywords = [
-        'recipe', 'cooking', 'cook', 'ingredient', 'tablespoon', 'teaspoon',
-        'karahi', 'biryani', 'curry', 'masala', 'chawal', 'gosht', 'daal',
-        'pakana', 'khana', 'paka', 'tail', 'namak', 'mirch', 'aata', 'maida',
-        'drama', 'episode', 'serial', 'actor', 'actress', 'scene',
-        'song', 'music', 'singer', 'lyrics', 'concert', 'album',
-        'cartoon', 'animation', 'anime', 'character',
-        'vlog', 'makeup', 'beauty', 'skincare', 'fashion', 'outfit',
-        'subscribe', 'like karo', 'follow karo', 'instagram',
-        'funny', 'comedy', 'prank', 'challenge',
-        'news', 'breaking news', 'reporter', 'anchor',
-        'game', 'gaming', 'gameplay', 'streamer',
-        'travel', 'trip', 'tour', 'vlog',
-        'reaction', 'review karte hain',
-    ]
-      accept_keywords = [
-         'lecture', 'lesson', 'chapter', 'topic', 'concept', 'definition',
-        'theory', 'algorithm', 'programming', 'code', 'function',
-        'mathematics', 'math', 'physics', 'chemistry', 'biology',
-        'history', 'geography', 'economics', 'psychology',
-        'tutorial', 'course', 'university', 'college', 'school',
-        'exam', 'assignment', 'hypothesis', 'equation', 'formula',
-        'theorem', 'proof', 'data', 'analysis', 'research',
-        'python', 'javascript', 'machine learning', 'artificial intelligence',
-        'database', 'network', 'compiler', 'accounting', 'finance',
-        'explain', 'understand', 'learn', 'study', 'education',
-        'parh', 'seekhna', 'samajhna', 'taleem', 'ilm',
-        'class', 'teacher', 'student', 'syllabus', 'notes',
-    ]
-      text = (transcript[:3000] + " " + title).lower()
-      reject_count = sum(1 for kw in reject_keywords if kw in text)
-      accept_count = sum(1 for kw in accept_keywords if kw in text)
-    # 2 ya zyada reject keywords — reject!
-      if reject_count >= 2:
-        return False
-    # 2 ya zyada accept keywords — allow!
-      if accept_count >= 2:
-        return True
-    # Dono mein kuch nahi — AI se check
-      try:
-        system_prompt = """Strict classifier. Return ONLY 'yes' or 'no'.
-'yes' ONLY for: university lecture, school lesson, coding tutorial, academic subject, professional skill training.
-'no' for: cooking, drama, song, vlog, news, cartoon, gaming, fashion, travel, comedy, recipe."""
-        user_prompt = f"Title: {title}\nTranscript: {transcript[:800]}\nIs this educational? yes or no only."
-        response = self._call_llm(system_prompt, user_prompt, max_tokens=5)
-        return 'yes' in response.lower().strip()
-      except:
-        return False

         api_key = os.environ.get("SUPADATA_API_KEY")
         if not api_key:
             raise ValueError("SUPADATA_API_KEY environment variable not set!")
+        self.client = OpenAI(
+            api_key=api_key,
+            base_url="https://api.supadata.ai/v1"  # ← Supadata ka LLM endpoint
+        )
         self.model = "gpt-4o-mini"
     def _call_llm(self, system_prompt: str, user_prompt: str, max_tokens: int = 2000) -> str:
     def _get_language_instruction(self, language: str) -> str:
         instructions = {
+            "english": "Respond in clear, simple English.",
+            "urdu": "صرف اردو میں جواب دیں۔ آسان اردو استعمال کریں۔",
+            "roman_urdu": "Sirf Roman Urdu mein jawab do. English bilkul mat use karo. Jaise: 'Yeh topic bohot important hai kyunki...'"
+        }
         return instructions.get(language, instructions["english"])
     def answer_question(self, question: str, context: str, language: str, video_title: str, history: list = []) -> str:
                 {"from": "3", "to": "4", "label": ""}
             ]
         }
     def generate_quiz(self, transcript: str, language: str, video_title: str) -> list:
         lang_instruction = self._get_language_instruction(language)
         transcript_excerpt = transcript[:5000]
         system_prompt = f"""You are LectureLens AI. Generate MCQ quiz questions.
 {lang_instruction}
 IMPORTANT: Return ONLY a valid JSON array. No markdown, no backticks, no explanation.
 3. Unique Points in Video 2
 4. Which is better for beginners?"""
         return self._call_llm(system_prompt, user_prompt, max_tokens=2000)
     def check_educational(self, transcript: str, title: str = "") -> bool:
+        """
+        FIX: Pehle sirf obvious non-educational content reject karo.
+        Doubt ho to ALLOW karo — reject mat karo.
+        """
+        # Sirf ye cheezein clearly reject karo
+        hard_reject_keywords = [
+            'recipe', 'cooking', 'ingredient', 'tablespoon', 'teaspoon',
+            'karahi', 'biryani', 'pakana', 'khana banana',
+            'drama serial', 'episode dekho', 'actor', 'actress',
+            'song lyrics', 'music video', 'concert',
+            'makeup tutorial', 'skincare routine', 'fashion haul',
+            'funny prank', 'comedy sketch',
+            'gaming gameplay', 'game stream',
+        ]
+        # Ye hain to definitely educational hai
+        hard_accept_keywords = [
+            'lecture', 'lesson', 'chapter', 'tutorial', 'course',
+            'university', 'college', 'school', 'class',
+            'algorithm', 'programming', 'python', 'javascript', 'machine learning',
+            'mathematics', 'math', 'physics', 'chemistry', 'biology',
+            'history', 'geography', 'economics', 'psychology',
+            'exam', 'assignment', 'hypothesis', 'equation', 'formula',
+            'theorem', 'proof', 'data structure', 'database',
+            'artificial intelligence', 'deep learning', 'neural network',
+            'accounting', 'finance', 'networking', 'compiler',
+            'parh', 'seekhna', 'samajhna', 'taleem', 'ilm', 'sabaq',
+            'teacher', 'student', 'syllabus', 'notes',
+            'explain', 'definition', 'concept', 'theory',
+        ]
+        text = (transcript[:3000] + " " + title).lower()
+        # Koi bhi hard_accept mila → turant allow
+        for kw in hard_accept_keywords:
+            if kw in text:
+                return True
+        # Sirf tab reject karo jab 3+ hard_reject keywords hon
+        reject_count = sum(1 for kw in hard_reject_keywords if kw in text)
+        if reject_count >= 3:
+            return False
+        # Baaki sab cases mein → ALLOW (doubt ka faida user ko do)
+        return True