Sameer1234-prog
Enhanced UI with better text formatting, fixed FFmpeg detection, improved lesson display with black text and bold formatting
8fb661f | import os | |
| import faiss | |
| import numpy as np | |
| from sentence_transformers import SentenceTransformer | |
| from PyPDF2 import PdfReader | |
| from openai import OpenAI | |
| from dotenv import load_dotenv | |
| import json | |
| # Optional Pinecone | |
| try: | |
| from pinecone import Pinecone | |
| PINECONE_AVAILABLE = True | |
| except ImportError: | |
| PINECONE_AVAILABLE = False | |
| load_dotenv() | |
| embed_model = SentenceTransformer('all-MiniLM-L6-v2') | |
| client = OpenAI( | |
| api_key=os.getenv("GROQ_API_KEY"), | |
| base_url="https://api.groq.com/openai/v1", | |
| ) | |
| # Decide vector store | |
| use_pinecone = PINECONE_AVAILABLE and bool(os.getenv("PINECONE_API_KEY")) | |
| if use_pinecone: | |
| pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY")) | |
| index_name = "mindgap-index" | |
| existing_indexes = [index.name for index in pc.list_indexes()] | |
| if index_name not in existing_indexes: | |
| pc.create_index( | |
| name=index_name, | |
| dimension=384, | |
| metric='cosine', | |
| spec={'serverless': {'cloud': 'aws', 'region': 'us-east-1'}} | |
| ) | |
| vector_index = pc.Index(index_name) | |
| else: | |
| # FAISS fallback | |
| faiss_index = faiss.IndexFlatL2(384) | |
| stored_chunks = [] | |
| class RAGEngine: | |
| def __init__(self): | |
| self.dimension = 384 | |
| def process_file(self, file_path, ocr_text=""): | |
| text = ocr_text | |
| if file_path.endswith('.pdf'): | |
| reader = PdfReader(file_path) | |
| for page in reader.pages: | |
| text += (page.extract_text() or "") + "\n" | |
| else: | |
| with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: | |
| text += f.read() + "\n" | |
| chunks = self._simple_chunk(text) | |
| embeddings = embed_model.encode(chunks) | |
| if use_pinecone: | |
| vectors = [ | |
| {"id": str(i), "values": emb.tolist(), "metadata": {"text": chunk}} | |
| for i, (emb, chunk) in enumerate(zip(embeddings, chunks)) | |
| ] | |
| vector_index.upsert(vectors=vectors) | |
| else: | |
| global faiss_index, stored_chunks | |
| faiss_index.add(np.array(embeddings).astype('float32')) | |
| stored_chunks.extend(chunks) | |
| return len(chunks) | |
| def _simple_chunk(self, text, size=450, overlap=80): | |
| words = text.split() | |
| chunks = [] | |
| for i in range(0, len(words), size - overlap): | |
| chunk = " ".join(words[i:i + size]) | |
| chunks.append(chunk) | |
| return chunks | |
| def search(self, query, top_k=3): | |
| query_emb = embed_model.encode([query])[0] | |
| if use_pinecone: | |
| res = vector_index.query( | |
| vector=query_emb.tolist(), | |
| top_k=top_k, | |
| include_metadata=True | |
| ) | |
| return [m['metadata']['text'] for m in res['matches'] if 'text' in m['metadata']] | |
| else: | |
| global faiss_index, stored_chunks | |
| if len(stored_chunks) == 0: | |
| return [] | |
| distances, indices = faiss_index.search(np.array([query_emb]).astype('float32'), top_k) | |
| return [stored_chunks[i] for i in indices[0] if i < len(stored_chunks)] | |
| def generate_response(self, prompt, context="", profile={}, history=[]): | |
| history_text = "\n".join([f"User: {h.get('user','')}\nAI: {h.get('ai','')}" for h in history[-5:]]) | |
| full_prompt = f"""Context from materials:\n{context} | |
| Student profile: | |
| Difficulty: {profile.get('difficulty', 'beginner')} | |
| Language preference: {profile.get('language', 'English')} | |
| Weak topics: {', '.join(profile.get('weak_topics', []))} | |
| Recent conversation: | |
| {history_text} | |
| User message: {prompt} | |
| Respond naturally, helpfully and educationally. Keep explanations clear and adapt to the student's level. | |
| """ | |
| try: | |
| resp = client.chat.completions.create( | |
| model="llama-3.3-70b-versatile", | |
| messages=[ | |
| {"role": "system", "content": "You are MindGap AI – friendly, adaptive learning assistant."}, | |
| {"role": "user", "content": full_prompt} | |
| ], | |
| temperature=0.7, | |
| max_tokens=1200 | |
| ) | |
| return resp.choices[0].message.content.strip() | |
| except Exception as e: | |
| return f"I apologize, but I'm having trouble connecting to the AI service. Error: {str(e)}\n\nPlease check your GROQ_API_KEY in the .env file and ensure it's valid." | |
| def generate_quiz(self, topic, context="", profile={}): | |
| prompt = f"""Based on topic '{topic}' and context:\n{context} | |
| Create 3 multiple-choice questions (JSON array). | |
| Each question must have: | |
| - "question": str | |
| - "options": list of 4 strings | |
| - "correct_answer": one of the options (exact string) | |
| - "explanation": short explanation | |
| Output **only** valid JSON array, nothing else. | |
| """ | |
| try: | |
| resp = client.chat.completions.create( | |
| model="llama-3.3-70b-versatile", | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=0.4 | |
| ) | |
| return json.loads(resp.choices[0].message.content) | |
| except Exception as e: | |
| print(f"Quiz generation error: {e}") | |
| return [] | |