Spaces:

andevs
/

studyloop

Running

App Files Files Community

andevs commited on May 26

Commit

a01f67d

verified ·

1 Parent(s): b2863f6

Update app.py

Browse files

Files changed (1) hide show

app.py +729 -189

app.py CHANGED Viewed

@@ -1,7 +1,8 @@
 """
-StudyFlow AI Backend - AI-Powered Question Generation
-Uses Hugging Face Inference API for intelligent question generation
 """
 import os
 import json
 import sqlite3
@@ -9,19 +10,21 @@ import hashlib
 import tempfile
 import re
 import requests
 from datetime import datetime
-from typing import List, Dict, Optional
-from fastapi import FastAPI, UploadFile, File, Form, HTTPException
-from fastapi.responses import JSONResponse, HTMLResponse
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
 import PyPDF2
 from youtube_transcript_api import YouTubeTranscriptApi
 # Initialize FastAPI
-app = FastAPI(title="StudyFlow AI", version="3.0.0")
-# CORS middleware
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -30,19 +33,26 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# Hugging Face API configuration
 HF_API_TOKEN = os.environ.get("HF_API_TOKEN", "")
-HF_API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
 # Database setup
 DB_PATH = "/data/studyflow.db" if os.path.exists("/data") else "studyflow.db"
 def init_db():
-    """Initialize SQLite database"""
     conn = sqlite3.connect(DB_PATH)
     cursor = conn.cursor()
-    # Sessions table with page selections
     cursor.execute('''
         CREATE TABLE IF NOT EXISTS sessions (
             id TEXT PRIMARY KEY,
@@ -57,7 +67,7 @@ def init_db():
         )
     ''')
-    # Questions table
     cursor.execute('''
         CREATE TABLE IF NOT EXISTS questions (
             id TEXT PRIMARY KEY,
@@ -77,47 +87,114 @@ def init_db():
         )
     ''')
-    # Pages table for PDF page content
     cursor.execute('''
         CREATE TABLE IF NOT EXISTS pages (
             id TEXT PRIMARY KEY,
             session_id TEXT NOT NULL,
             page_number INTEGER NOT NULL,
             content TEXT NOT NULL,
             FOREIGN KEY (session_id) REFERENCES sessions (id) ON DELETE CASCADE
         )
     ''')
     conn.commit()
     conn.close()
 init_db()
-def generate_id(text: str = None):
     """Generate a unique ID"""
-    import uuid
-    if text:
-        return hashlib.md5(text.encode()).hexdigest()[:12]
-    return str(uuid.uuid4())[:12]
 def extract_text_from_pdf(file_path: str) -> Dict[int, str]:
-    """Extract text from PDF file and return pages dictionary"""
     pages_text = {}
     try:
         with open(file_path, 'rb') as file:
             pdf_reader = PyPDF2.PdfReader(file)
             for page_num, page in enumerate(pdf_reader.pages, start=1):
-                page_text = page.extract_text()
-                if page_text and len(page_text.strip()) > 50:
-                    pages_text[page_num] = page_text.strip()
-        return pages_text
     except Exception as e:
-        print(f"PDF extraction error: {str(e)}")
         return {}
-def extract_text_from_youtube(url: str) -> str:
-    """Extract transcript from YouTube video"""
     try:
         if "youtube.com/watch?v=" in url:
             video_id = url.split("v=")[-1].split("&")[0]
         elif "youtu.be/" in url:
@@ -125,183 +202,414 @@ def extract_text_from_youtube(url: str) -> str:
         else:
             return ""
-        transcript = YouTubeTranscriptApi.get_transcript(video_id)
-        text = " ".join([entry['text'] for entry in transcript])
         return text
     except Exception as e:
-        print(f"YouTube extraction error: {str(e)}")
         return ""
-def call_hf_api(prompt: str, max_length: int = 500) -> Optional[str]:
-    """Call Hugging Face API for AI-powered question generation"""
-    if not HF_API_TOKEN:
         return None
     try:
-        headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
         payload = {
             "inputs": prompt,
             "parameters": {
                 "max_new_tokens": max_length,
-                "temperature": 0.7,
-                "top_p": 0.9,
-                "do_sample": True
             }
         }
-        response = requests.post(HF_API_URL, headers=headers, json=payload, timeout=30)
         if response.status_code == 200:
             result = response.json()
-            return result[0].get("generated_text", "")
         return None
     except Exception as e:
-        print(f"HF API error: {str(e)}")
         return None
 def generate_questions_with_ai(content: str, difficulty: str, count: int, page_ref: int = None) -> List[Dict]:
-    """Generate intelligent questions using AI"""
-    # Build prompt for AI
-    difficulty_prompts = {
-        "easy": "Generate basic recall and definition questions that test understanding of key terms and simple facts.",
-        "medium": "Generate conceptual questions that test understanding of relationships, causes, effects, and comparisons.",
-        "hard": "Generate analytical questions that require critical thinking, application, evaluation, and synthesis of ideas."
     }
-    prompt = f"""You are an expert educator creating study questions. Based on the following text, generate {count} {difficulty}-level questions.
-{difficulty_prompts[difficulty]}
-For each question, provide:
-1. The question text
-2. Question type (multiple_choice, true_false, or short_answer)
-3. For multiple choice: 4 options (A, B, C, D) with one correct
-4. For true/false: the correct answer
-5. For short answer: a model answer
-6. A brief explanation of why the answer is correct
-Format your response as JSON array:
 [
   {{
-    "text": "question text",
-    "type": "multiple_choice",
-    "options": ["option1", "option2", "option3", "option4"],
-    "correct_answer": "the correct option text",
-    "explanation": "explanation here"
   }}
 ]
-TEXT CONTENT:
-{content[:3000]}
-Generate {count} questions in JSON format:"""
-    ai_response = call_hf_api(prompt, 2000)
     if ai_response:
         try:
             # Extract JSON from response
-            json_match = re.search(r'\[[\s\S]*\]', ai_response)
             if json_match:
                 questions_data = json.loads(json_match.group())
                 questions = []
                 for i, q_data in enumerate(questions_data[:count]):
-                    questions.append({
                         "id": generate_id(f"q_{i}"),
                         "question_text": q_data.get("text", ""),
                         "question_type": q_data.get("type", "short_answer"),
                         "options": json.dumps(q_data.get("options", [])) if q_data.get("options") else None,
-                        "correct_answer": q_data.get("correct_answer", ""),
                         "difficulty": difficulty,
-                        "explanation": q_data.get("explanation", "Review the material for this answer."),
                         "page_reference": page_ref
-                    })
                 if questions:
                     return questions
-        except:
-            pass
-    # Fallback to intelligent template-based generation
     return generate_questions_fallback(content, difficulty, count, page_ref)
 def generate_questions_fallback(content: str, difficulty: str, count: int, page_ref: int = None) -> List[Dict]:
-    """Enhanced fallback question generation with better intelligence"""
-    # Extract sentences, facts, and concepts
     sentences = re.split(r'[.!?]+', content)
-    sentences = [s.strip() for s in sentences if len(s.strip()) > 30]
-    # Extract numbers/dates
-    numbers = re.findall(r'\b\d{4}\b|\b\d+\.\d+\b|\b\d+%\b|\b\d+\s+(?:percent|million|billion|thousand)\b', content)
-    # Extract proper nouns (potential key terms)
-    proper_nouns = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', content)
     questions = []
-    for i in range(min(count, max(len(sentences), 5))):
-        sentence = sentences[i % len(sentences)] if sentences else content[:200]
-        qid = generate_id(f"q_{i}")
-        if difficulty == "easy" and len(numbers) > 0:
-            # Number-based question
-            num = numbers[i % len(numbers)]
-            questions.append({
-                "id": qid,
-                "question_text": f"What is the significance of {num} in the context of this material?",
-                "question_type": "short_answer",
-                "options": None,
-                "correct_answer": f"The number {num} represents a key figure or measurement discussed in the text.",
-                "difficulty": "easy",
-                "explanation": "Look for context around this number in the material.",
-                "page_reference": page_ref
-            })
-        elif difficulty == "easy" and proper_nouns:
-            # Term definition question
-            term = proper_nouns[i % len(proper_nouns)]
-            questions.append({
-                "id": qid,
-                "question_text": f"What does the term '{term}' refer to in this material?",
-                "question_type": "short_answer",
-                "options": None,
-                "correct_answer": f"'{term}' is a key term discussed in the material that relates to the main topic.",
-                "difficulty": "easy",
-                "explanation": f"Look for definitions or context around '{term}' in the text.",
-                "page_reference": page_ref
-            })
-        elif difficulty == "medium":
-            # Conceptual question with multiple choice
-            words = sentence.split()
-            key_word = next((w for w in words if len(w) > 5), "the concept")
             options = [
-                f"The main idea about {key_word} is clearly explained",
-                f"A minor detail mentioned in passing",
-                f"An unrelated example provided for context",
                 f"The conclusion drawn from the discussion"
             ]
             questions.append({
-                "id": qid,
-                "question_text": f"Based on the text: \"{sentence[:150]}...\" What is the main idea being conveyed?",
                 "question_type": "multiple_choice",
                 "options": json.dumps(options),
                 "correct_answer": options[0],
                 "difficulty": "medium",
-                "explanation": f"The text emphasizes {key_word} as an important concept.",
                 "page_reference": page_ref
             })
-        else:
-            # Analysis question
             questions.append({
-                "id": qid,
-                "question_text": f"Analyze the following statement and explain its implications: \"{sentence[:200]}...\"",
                 "question_type": "short_answer",
                 "options": None,
-                "correct_answer": f"This statement suggests that {sentence[:100]} which has important implications for understanding the broader context.",
                 "difficulty": "hard",
-                "explanation": "Critical analysis requires considering causes, effects, and connections to other concepts.",
                 "page_reference": page_ref
             })
-    return questions
 @app.post("/api/process-content")
 async def process_content(
@@ -311,96 +619,143 @@ async def process_content(
     content: str = Form(None),
     file: UploadFile = File(None),
     youtube_url: str = Form(None),
-    selected_pages: str = Form(None),  # JSON string of selected page numbers
-    time_start: float = Form(None),    # For YouTube time selection
-    time_end: float = Form(None)
 ):
-    """Process uploaded content with page/segment selection"""
-    session_id = generate_id(title)
     text_content = ""
     pages_dict = {}
     total_pages = 0
     selected_pages_list = []
     try:
         if content_type == "text":
-            text_content = content[:20000] if content else ""
-        elif content_type == "pdf" and file:
             with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
                 content_bytes = await file.read()
                 temp_file.write(content_bytes)
                 temp_file_path = temp_file.name
             pages_dict = extract_text_from_pdf(temp_file_path)
             os.unlink(temp_file_path)
             total_pages = len(pages_dict)
             # Parse selected pages
             if selected_pages:
-                selected_pages_list = json.loads(selected_pages)
-            else:
                 selected_pages_list = list(pages_dict.keys())
-            # Combine selected pages into text
-            for page_num in selected_pages_list:
                 if page_num in pages_dict:
                     text_content += f"\n--- Page {page_num} ---\n{pages_dict[page_num]}\n"
-        elif content_type == "youtube" and youtube_url:
-            full_transcript = extract_text_from_youtube(youtube_url)
-            # Handle time selection
-            if time_start is not None and time_end is not None:
-                # Would need timestamp-based transcript filtering
-                text_content = full_transcript
-            else:
-                text_content = full_transcript
         if len(text_content) < 100:
-            raise HTTPException(status_code=400, detail="Content too short. Please provide at least 100 characters.")
         # Generate questions
-        questions = generate_questions_with_ai(text_content, difficulty, 5, None)
         # Save to database
         conn = sqlite3.connect(DB_PATH)
         cursor = conn.cursor()
         # Save session
-        cursor.execute(
-            """INSERT OR REPLACE INTO sessions
-               (id, title, content_type, difficulty, selected_pages, total_pages, last_accessed)
-               VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)""",
-            (session_id, title, content_type, difficulty,
-             json.dumps(selected_pages_list) if selected_pages_list else None,
-             total_pages)
-        )
         # Save pages
         for page_num, page_content in pages_dict.items():
-            cursor.execute(
-                "INSERT OR REPLACE INTO pages (id, session_id, page_number, content) VALUES (?, ?, ?, ?)",
-                (generate_id(f"page_{page_num}"), session_id, page_num, page_content[:5000])
-            )
         # Save questions
         for q in questions:
-            cursor.execute(
-                """INSERT INTO questions
-                   (id, session_id, question_text, question_type, options, correct_answer, difficulty, explanation, page_reference)
-                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
-                (q["id"], session_id, q["question_text"], q["question_type"],
-                 q.get("options"), q["correct_answer"], q["difficulty"], q.get("explanation", ""),
-                 q.get("page_reference"))
-            )
         conn.commit()
         conn.close()
         return {
             "session_id": session_id,
             "question_count": len(questions),
             "total_pages": total_pages,
             "selected_pages": selected_pages_list
         }
@@ -408,16 +763,20 @@ async def process_content(
     except HTTPException:
         raise
     except Exception as e:
-        print(f"Error: {str(e)}")
         raise HTTPException(status_code=500, detail=str(e))
 @app.get("/api/session/{session_id}")
 async def get_session(session_id: str):
-    """Get session with all materials"""
     conn = sqlite3.connect(DB_PATH)
     conn.row_factory = sqlite3.Row
     cursor = conn.cursor()
     cursor.execute("SELECT * FROM sessions WHERE id = ?", (session_id,))
     session = cursor.fetchone()
@@ -425,32 +784,54 @@ async def get_session(session_id: str):
         conn.close()
         raise HTTPException(status_code=404, detail="Session not found")
-    cursor.execute("SELECT * FROM questions WHERE session_id = ?", (session_id,))
     questions = [dict(row) for row in cursor.fetchall()]
     cursor.execute("SELECT * FROM pages WHERE session_id = ? ORDER BY page_number", (session_id,))
     pages = [dict(row) for row in cursor.fetchall()]
     total_questions = len(questions)
     correct_answers = sum(1 for q in questions if q.get("is_correct") == 1)
     accuracy = round((correct_answers / total_questions * 100) if total_questions > 0 else 0, 1)
     conn.close()
     return {
         "session": dict(session),
-        "pages": pages,
         "questions": questions,
         "performance": {
             "total_questions": total_questions,
             "correct_answers": correct_answers,
-            "accuracy": accuracy
         }
     }
 @app.get("/api/user/sessions")
 async def get_user_sessions():
-    """Get all user sessions"""
     conn = sqlite3.connect(DB_PATH)
     conn.row_factory = sqlite3.Row
     cursor = conn.cursor()
@@ -458,7 +839,19 @@ async def get_user_sessions():
     cursor.execute("SELECT * FROM sessions ORDER BY last_accessed DESC")
     sessions = [dict(row) for row in cursor.fetchall()]
     conn.close()
     return {"sessions": sessions}
 @app.post("/api/submit-answer")
@@ -468,10 +861,12 @@ async def submit_answer(
     user_answer: str = Form(...),
     time_spent: int = Form(0)
 ):
-    """Submit an answer for evaluation"""
     conn = sqlite3.connect(DB_PATH)
     cursor = conn.cursor()
     cursor.execute("SELECT correct_answer, question_type FROM questions WHERE id = ? AND session_id = ?",
                    (question_id, session_id))
     result = cursor.fetchone()
@@ -483,24 +878,49 @@ async def submit_answer(
     correct_answer = result[0]
     question_type = result[1]
-    # Evaluate answer
     is_correct = 0
     if question_type == "multiple_choice":
         is_correct = 1 if user_answer.strip() == correct_answer.strip() else 0
     elif question_type == "true_false":
         is_correct = 1 if user_answer.strip().lower() == correct_answer.strip().lower() else 0
-    else:
         # Smart evaluation for short answers
-        user_lower = user_answer.strip().lower()
-        correct_lower = correct_answer.strip().lower()
-        keywords = re.findall(r'\b[a-z]{4,}\b', correct_lower)
-        matches = sum(1 for kw in keywords if kw in user_lower)
-        is_correct = 1 if matches >= len(keywords) * 0.3 or len(user_lower) > 40 else 0
-    cursor.execute(
-        "UPDATE questions SET user_answer = ?, is_correct = ?, time_spent = ? WHERE id = ?",
-        (user_answer, is_correct, time_spent, question_id)
-    )
     conn.commit()
     conn.close()
@@ -508,13 +928,133 @@ async def submit_answer(
     return {
         "is_correct": bool(is_correct),
         "correct_answer": correct_answer,
-        "feedback": "Correct!" if is_correct else f"The correct answer is: {correct_answer}"
     }
-@app.get("/health")
-async def health_check():
-    return {"status": "healthy", "timestamp": datetime.now().isoformat()}
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

 """
+StudyFlow AI Backend - Complete Production Version
+Features: AI-powered question generation, PDF page selection, YouTube transcript extraction, full database persistence
 """
 import os
 import json
 import sqlite3
 import tempfile
 import re
 import requests
+import uuid
 from datetime import datetime
+from typing import List, Dict, Optional, Tuple
+from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request
+from fastapi.responses import JSONResponse, HTMLResponse, FileResponse
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
 import PyPDF2
 from youtube_transcript_api import YouTubeTranscriptApi
+from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound
 # Initialize FastAPI
+app = FastAPI(title="StudyFlow AI", version="3.0.0", description="AI-Powered Study Assistant")
+# CORS middleware - Allow all origins for development
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"],
 )
+# ==================== CONFIGURATION ====================
+# Hugging Face API configuration (optional - will use fallback if not set)
 HF_API_TOKEN = os.environ.get("HF_API_TOKEN", "")
+HF_API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
+# Alternative models (uncomment to use):
+# HF_API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b-chat-hf"
+# HF_API_URL = "https://api-inference.huggingface.co/models/google/flan-t5-large"
 # Database setup
 DB_PATH = "/data/studyflow.db" if os.path.exists("/data") else "studyflow.db"
+# ==================== DATABASE INITIALIZATION ====================
 def init_db():
+    """Initialize SQLite database with all required tables"""
     conn = sqlite3.connect(DB_PATH)
     cursor = conn.cursor()
+    # Sessions table - stores main session info
     cursor.execute('''
         CREATE TABLE IF NOT EXISTS sessions (
             id TEXT PRIMARY KEY,
         )
     ''')
+    # Questions table - stores all generated questions
     cursor.execute('''
         CREATE TABLE IF NOT EXISTS questions (
             id TEXT PRIMARY KEY,
         )
     ''')
+    # Pages table - stores individual page content from PDFs
     cursor.execute('''
         CREATE TABLE IF NOT EXISTS pages (
             id TEXT PRIMARY KEY,
             session_id TEXT NOT NULL,
             page_number INTEGER NOT NULL,
             content TEXT NOT NULL,
+            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+            FOREIGN KEY (session_id) REFERENCES sessions (id) ON DELETE CASCADE
+        )
+    ''')
+    # Flashcards table
+    cursor.execute('''
+        CREATE TABLE IF NOT EXISTS flashcards (
+            id TEXT PRIMARY KEY,
+            session_id TEXT NOT NULL,
+            front TEXT NOT NULL,
+            back TEXT NOT NULL,
+            category TEXT,
+            difficulty TEXT,
+            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+            FOREIGN KEY (session_id) REFERENCES sessions (id) ON DELETE CASCADE
+        )
+    ''')
+    # Notes table
+    cursor.execute('''
+        CREATE TABLE IF NOT EXISTS notes (
+            id TEXT PRIMARY KEY,
+            session_id TEXT NOT NULL,
+            title TEXT NOT NULL,
+            content TEXT NOT NULL,
+            tags TEXT,
+            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+            updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
             FOREIGN KEY (session_id) REFERENCES sessions (id) ON DELETE CASCADE
         )
     ''')
+    # User profile table for analytics
+    cursor.execute('''
+        CREATE TABLE IF NOT EXISTS user_profile (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            total_questions_answered INTEGER DEFAULT 0,
+            total_correct_answers INTEGER DEFAULT 0,
+            total_study_time INTEGER DEFAULT 0,
+            total_sessions_created INTEGER DEFAULT 0,
+            last_active TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+        )
+    ''')
+    # Create indexes for better performance
+    cursor.execute('CREATE INDEX IF NOT EXISTS idx_questions_session ON questions(session_id)')
+    cursor.execute('CREATE INDEX IF NOT EXISTS idx_pages_session ON pages(session_id)')
+    cursor.execute('CREATE INDEX IF NOT EXISTS idx_flashcards_session ON flashcards(session_id)')
+    cursor.execute('CREATE INDEX IF NOT EXISTS idx_sessions_accessed ON sessions(last_accessed)')
     conn.commit()
     conn.close()
+    print(f"✅ Database initialized at: {DB_PATH}")
+# Initialize database on startup
 init_db()
+# ==================== HELPER FUNCTIONS ====================
+def generate_id(prefix: str = "") -> str:
     """Generate a unique ID"""
+    unique_id = str(uuid.uuid4())[:12]
+    return f"{prefix}_{unique_id}" if prefix else unique_id
 def extract_text_from_pdf(file_path: str) -> Dict[int, str]:
+    """
+    Extract text from PDF file and return dictionary of page_number -> content
+    Detects page boundaries automatically even without explicit page numbers
+    """
     pages_text = {}
     try:
         with open(file_path, 'rb') as file:
             pdf_reader = PyPDF2.PdfReader(file)
+            total_pages = len(pdf_reader.pages)
             for page_num, page in enumerate(pdf_reader.pages, start=1):
+                try:
+                    page_text = page.extract_text()
+                    if page_text and len(page_text.strip()) > 30:  # Only include pages with meaningful content
+                        # Clean up the text
+                        page_text = re.sub(r'\s+', ' ', page_text).strip()
+                        pages_text[page_num] = page_text
+                    else:
+                        pages_text[page_num] = f"[Page {page_num} - No extractable text content]"
+                except Exception as e:
+                    print(f"Error extracting page {page_num}: {str(e)}")
+                    pages_text[page_num] = f"[Page {page_num} - Error extracting text]"
+            print(f"✅ Extracted {len(pages_text)} pages from PDF (total pages: {total_pages})")
+            return pages_text
     except Exception as e:
+        print(f"❌ PDF extraction error: {str(e)}")
         return {}
+def extract_text_from_youtube(url: str, start_time: float = None, end_time: float = None) -> str:
+    """
+    Extract transcript from YouTube video with optional time filtering
+    """
     try:
+        # Extract video ID from URL
         if "youtube.com/watch?v=" in url:
             video_id = url.split("v=")[-1].split("&")[0]
         elif "youtu.be/" in url:
         else:
             return ""
+        # Get transcript
+        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
+        # Filter by time if specified
+        if start_time is not None or end_time is not None:
+            filtered_transcript = []
+            for entry in transcript_list:
+                entry_time = entry['start']
+                if start_time is not None and entry_time < start_time:
+                    continue
+                if end_time is not None and entry_time > end_time:
+                    continue
+                filtered_transcript.append(entry)
+            transcript_list = filtered_transcript
+        # Combine text
+        text = " ".join([entry['text'] for entry in transcript_list])
+        print(f"✅ Extracted {len(transcript_list)} segments from YouTube video")
         return text
+    except TranscriptsDisabled:
+        print("❌ Transcripts disabled for this video")
+        return ""
+    except NoTranscriptFound:
+        print("❌ No transcript found for this video")
+        return ""
     except Exception as e:
+        print(f"❌ YouTube extraction error: {str(e)}")
         return ""
+def call_hf_api(prompt: str, max_length: int = 1000, temperature: float = 0.7) -> Optional[str]:
+    """
+    Call Hugging Face Inference API for AI-powered question generation
+    Returns None if API call fails (will use fallback)
+    """
+    if not HF_API_TOKEN or HF_API_TOKEN == "":
+        print("⚠️ No HF_API_TOKEN provided, using fallback question generation")
         return None
     try:
+        headers = {
+            "Authorization": f"Bearer {HF_API_TOKEN}",
+            "Content-Type": "application/json"
+        }
         payload = {
             "inputs": prompt,
             "parameters": {
                 "max_new_tokens": max_length,
+                "temperature": temperature,
+                "top_p": 0.95,
+                "do_sample": True,
+                "return_full_text": False
             }
         }
+        print(f"📡 Calling Hugging Face API...")
+        response = requests.post(HF_API_URL, headers=headers, json=payload, timeout=60)
         if response.status_code == 200:
             result = response.json()
+            generated_text = result[0].get("generated_text", "")
+            print(f"✅ AI response received ({len(generated_text)} chars)")
+            return generated_text
+        else:
+            print(f"❌ HF API error: {response.status_code} - {response.text}")
+            return None
+    except requests.exceptions.Timeout:
+        print("❌ HF API timeout after 60 seconds")
         return None
     except Exception as e:
+        print(f"❌ HF API error: {str(e)}")
         return None
 def generate_questions_with_ai(content: str, difficulty: str, count: int, page_ref: int = None) -> List[Dict]:
+    """
+    Generate intelligent questions using AI (Hugging Face) with fallback to smart template generation
+    """
+    # Limit content length for API
+    max_content_length = 3000
+    truncated_content = content[:max_content_length]
+    if len(content) > max_content_length:
+        truncated_content += "\n[Content truncated for length...]"
+    # Build difficulty-specific prompts
+    difficulty_instructions = {
+        "easy": """
+Generate basic recall and definition questions that test:
+- Key terms and their definitions
+- Simple facts and dates
+- Basic concepts and their characteristics
+- Direct information from the text
+Question types: short_answer (for definitions/facts), true_false (for simple statements)
+""",
+        "medium": """
+Generate conceptual understanding questions that test:
+- Relationships between concepts
+- Cause and effect relationships
+- Comparisons and contrasts
+- Application of concepts to examples
+- Why and how questions
+Question types: short_answer (for explanations), multiple_choice (for conceptual understanding)
+""",
+        "hard": """
+Generate analytical and critical thinking questions that test:
+- Evaluation of arguments or evidence
+- Synthesis of multiple concepts
+- Prediction of outcomes or implications
+- Problem-solving using concepts
+- Critical analysis of assumptions
+Question types: short_answer (for analysis), multiple_choice (for complex scenarios)
+"""
     }
+    prompt = f"""You are an expert educator creating high-quality study questions.
+TEXT CONTENT:
+{truncated_content}
+INSTRUCTIONS:
+Generate {count} {difficulty}-difficulty level questions based ONLY on the text above.
+{difficulty_instructions.get(difficulty, difficulty_instructions["medium"])}
+FORMAT YOUR RESPONSE AS A JSON ARRAY ONLY, no other text:
 [
   {{
+    "text": "Question text here",
+    "type": "short_answer",
+    "correct_answer": "Model answer here",
+    "explanation": "Brief explanation of why this is correct"
   }}
 ]
+For multiple_choice questions, use:
+  "type": "multiple_choice",
+  "options": ["Option A", "Option B", "Option C", "Option D"],
+  "correct_answer": "Option A"
+Generate {count} unique, thoughtful questions now:"""
+    # Try AI generation first
+    ai_response = call_hf_api(prompt, 2000, 0.8)
     if ai_response:
         try:
             # Extract JSON from response
+            json_match = re.search(r'\[\s*\{[\s\S]*\}\s*\]', ai_response)
             if json_match:
                 questions_data = json.loads(json_match.group())
                 questions = []
                 for i, q_data in enumerate(questions_data[:count]):
+                    question = {
                         "id": generate_id(f"q_{i}"),
                         "question_text": q_data.get("text", ""),
                         "question_type": q_data.get("type", "short_answer"),
                         "options": json.dumps(q_data.get("options", [])) if q_data.get("options") else None,
+                        "correct_answer": q_data.get("correct_answer", "Review the material for this answer."),
                         "difficulty": difficulty,
+                        "explanation": q_data.get("explanation", "Review the material for more information."),
                         "page_reference": page_ref
+                    }
+                    if question["question_text"] and len(question["question_text"]) > 10:
+                        questions.append(question)
                 if questions:
+                    print(f"✅ AI generated {len(questions)} questions")
                     return questions
+        except json.JSONDecodeError as e:
+            print(f"❌ Failed to parse AI response: {str(e)}")
+        except Exception as e:
+            print(f"❌ Error processing AI response: {str(e)}")
+    # Fallback to smart template generation
+    print("📝 Using fallback question generation")
     return generate_questions_fallback(content, difficulty, count, page_ref)
 def generate_questions_fallback(content: str, difficulty: str, count: int, page_ref: int = None) -> List[Dict]:
+    """
+    Smart fallback question generation using NLP techniques
+    This creates high-quality questions even without AI
+    """
+    # Clean and prepare text
+    content = re.sub(r'\s+', ' ', content).strip()
+    # Extract meaningful sentences (longer than 40 chars, not just numbers)
     sentences = re.split(r'[.!?]+', content)
+    sentences = [s.strip() for s in sentences if len(s.strip()) > 40 and not s.strip().isdigit()]
+    # Extract key terms (capitalized words, long words, numbers)
+    key_terms = set()
+    # Find capitalized words (potential proper nouns)
+    capitalized = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', content)
+    key_terms.update(capitalized[:10])
+    # Find long words (potential technical terms)
+    long_words = re.findall(r'\b[a-zA-Z]{6,}\b', content)
+    long_words = [w for w in long_words if w.lower() not in ['however', 'therefore', 'although', 'especially', 'important', 'different', 'significant']]
+    key_terms.update(long_words[:10])
+    # Find numbers and percentages
+    numbers = re.findall(r'\b\d+(?:\.\d+)?%?\b|\b\d+(?:,\d+)*(?:th|st|nd|rd)?\b', content)
+    key_terms.update(numbers[:5])
+    key_terms = list(key_terms)
+    if not sentences:
+        sentences = [content[:200]]
     questions = []
+    # Define question templates based on difficulty
+    if difficulty == "easy":
+        # Easy: definitions, true/false, fill-in-blank
+        for i in range(min(count, len(sentences) + len(key_terms))):
+            if i < len(key_terms) and key_terms[i]:
+                term = key_terms[i]
+                questions.append({
+                    "id": generate_id(f"q_{i}"),
+                    "question_text": f"Define or explain the term \"{term}\" in your own words.",
+                    "question_type": "short_answer",
+                    "options": None,
+                    "correct_answer": f"\"{term}\" is an important concept discussed in the material. A good answer should explain its meaning and significance.",
+                    "difficulty": "easy",
+                    "explanation": f"Look for where \"{term}\" is introduced and how it's used in context.",
+                    "page_reference": page_ref
+                })
+            elif i - len(key_terms) < len(sentences):
+                sentence = sentences[i - len(key_terms)]
+                # Create a true/false question
+                questions.append({
+                    "id": generate_id(f"q_{i}"),
+                    "question_text": f"True or False: {sentence[:150]}...",
+                    "question_type": "true_false",
+                    "options": None,
+                    "correct_answer": "True",
+                    "difficulty": "easy",
+                    "explanation": "This statement appears in the study material and is presented as fact.",
+                    "page_reference": page_ref
+                })
+    elif difficulty == "medium":
+        # Medium: multiple choice, relationship questions
+        for i in range(min(count, len(sentences))):
+            sentence = sentences[i % len(sentences)]
+            concept = key_terms[i % len(key_terms)] if key_terms else "the concept"
             options = [
+                f"The material emphasizes {concept} as a key factor",
+                f"A minor detail mentioned briefly",
+                f"An unrelated example for context",
                 f"The conclusion drawn from the discussion"
             ]
             questions.append({
+                "id": generate_id(f"q_{i}"),
+                "question_text": f"Based on the text: \"{sentence[:200]}...\" Which of the following best describes the main idea?",
                 "question_type": "multiple_choice",
                 "options": json.dumps(options),
                 "correct_answer": options[0],
                 "difficulty": "medium",
+                "explanation": f"The text focuses on {concept} as the central theme of this passage.",
                 "page_reference": page_ref
             })
+    else:  # hard
+        # Hard: analysis, application, evaluation
+        for i in range(min(count, len(sentences))):
+            sentence = sentences[i % len(sentences)]
+            concept = key_terms[i % len(key_terms)] if key_terms else "this concept"
+            question_types = [
+                f"Analyze the following statement and explain its implications: \"{sentence[:200]}...\"",
+                f"How would you apply the concept of {concept} to a real-world situation?",
+                f"Evaluate the following claim based on the material: \"{sentence[:150]}...\" Do you agree? Why or why not?",
+                f"What are the strengths and weaknesses of the argument presented in: \"{sentence[:150]}...\""
+            ]
+            q_text = question_types[i % len(question_types)]
             questions.append({
+                "id": generate_id(f"q_{i}"),
+                "question_text": q_text,
                 "question_type": "short_answer",
                 "options": None,
+                "correct_answer": f"This question requires critical thinking. A good answer would demonstrate understanding of {concept} and its broader implications as discussed in the material.",
                 "difficulty": "hard",
+                "explanation": "Consider multiple perspectives, evidence from the text, and potential applications.",
                 "page_reference": page_ref
             })
+    # Ensure we have exactly 'count' questions by duplicating with variations if needed
+    while len(questions) < count:
+        template = questions[len(questions) % len(questions)].copy()
+        template["id"] = generate_id(f"q_{len(questions)}")
+        template["question_text"] = template["question_text"] + " (Additional perspective)"
+        questions.append(template)
+    print(f"✅ Generated {len(questions)} fallback questions")
+    return questions[:count]
+def generate_flashcards(content: str, concepts: List[str], count: int = 8) -> List[Dict]:
+    """Generate flashcards from key concepts"""
+    flashcards = []
+    sentences = re.split(r'[.!?]+', content)
+    sentences = [s.strip() for s in sentences if len(s.strip()) > 50]
+    for i in range(min(count, len(concepts))):
+        concept = concepts[i]
+        # Find context sentence for this concept
+        context = ""
+        for sentence in sentences:
+            if concept.lower() in sentence.lower():
+                context = sentence[:150]
+                break
+        if not context and i < len(sentences):
+            context = sentences[i][:150]
+        flashcards.append({
+            "id": generate_id(f"fc_{i}"),
+            "front": f"Explain the concept of \"{concept}\" and its significance.",
+            "back": f"{context}... This concept is important because it helps understand the overall topic. Review the material for specific details about {concept}.",
+            "category": "Key Concept",
+            "difficulty": "medium"
+        })
+    return flashcards
+def extract_key_concepts(content: str, max_count: int = 15) -> List[str]:
+    """Extract key concepts using NLP techniques"""
+    # Clean text
+    text = content.lower()
+    # Remove common stop words
+    stop_words = {
+        'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were',
+        'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'but', 'so', 'if', 'then',
+        'else', 'when', 'where', 'which', 'what', 'who', 'whom', 'this', 'that', 'these', 'those', 'it', 'they', 'we',
+        'you', 'he', 'she', 'it', 'them', 'her', 'him', 'us', 'can', 'will', 'would', 'could', 'should', 'may', 'might',
+        'must', 'from', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'between', 'using', 'being',
+        'however', 'therefore', 'although', 'especially', 'important', 'different', 'significant'
+    }
+    # Extract words and count frequencies
+    words = re.findall(r'\b[a-z]{4,}\b', text)
+    freq = {}
+    for word in words:
+        if word not in stop_words:
+            freq[word] = freq.get(word, 0) + 1
+    # Extract phrases (2-3 word sequences)
+    phrases = re.findall(r'\b[a-z]{3,}\s+[a-z]{3,}\b', text)
+    phrase_freq = {}
+    for phrase in phrases[:100]:
+        if not any(stop in phrase.split() for stop in stop_words):
+            phrase_freq[phrase] = phrase_freq.get(phrase, 0) + 1
+    # Get top keywords and phrases
+    sorted_words = sorted(freq.items(), key=lambda x: x[1], reverse=True)
+    sorted_phrases = sorted(phrase_freq.items(), key=lambda x: x[1], reverse=True)
+    concepts = []
+    for word, _ in sorted_words[:max_count]:
+        concepts.append(word)
+    for phrase, _ in sorted_phrases[:5]:
+        if phrase not in concepts:
+            concepts.append(phrase)
+    return concepts[:max_count]
+# ==================== API ENDPOINTS ====================
+@app.get("/")
+async def serve_frontend():
+    """Serve the main frontend page"""
+    try:
+        with open("index.html", "r", encoding="utf-8") as f:
+            return HTMLResponse(content=f.read())
+    except FileNotFoundError:
+        return HTMLResponse(content="""
+        <!DOCTYPE html>
+        <html>
+        <head><title>StudyFlow AI</title></head>
+        <body>
+            <h1>StudyFlow AI Backend Running</h1>
+            <p>API is operational. Please ensure index.html is in the same directory.</p>
+            <p>Available endpoints: /api/user/sessions, /api/session/{id}, /api/process-content</p>
+        </body>
+        </html>
+        """)
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    return {
+        "status": "healthy",
+        "timestamp": datetime.now().isoformat(),
+        "database": DB_PATH,
+        "ai_available": bool(HF_API_TOKEN and HF_API_TOKEN != "")
+    }
 @app.post("/api/process-content")
 async def process_content(
     content: str = Form(None),
     file: UploadFile = File(None),
     youtube_url: str = Form(None),
+    selected_pages: str = Form(None),
+    time_start: float = Form(None),
+    time_end: float = Form(None),
+    num_questions: int = Form(15)
 ):
+    """
+    Process uploaded content and generate questions
+    Supports: text, PDF with page selection, YouTube with time selection
+    """
+    print(f"📝 Processing request: type={content_type}, difficulty={difficulty}, title={title}, num_questions={num_questions}")
+    session_id = generate_id("session")
     text_content = ""
     pages_dict = {}
     total_pages = 0
     selected_pages_list = []
     try:
+        # Handle different content types
         if content_type == "text":
+            if not content:
+                raise HTTPException(status_code=400, detail="No text content provided")
+            text_content = content[:50000]  # Limit to 50k chars
+            print(f"📄 Text content length: {len(text_content)} chars")
+        elif content_type == "pdf":
+            if not file:
+                raise HTTPException(status_code=400, detail="No PDF file provided")
+            # Save uploaded file temporarily
             with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
                 content_bytes = await file.read()
                 temp_file.write(content_bytes)
                 temp_file_path = temp_file.name
+            # Extract pages from PDF
             pages_dict = extract_text_from_pdf(temp_file_path)
             os.unlink(temp_file_path)
             total_pages = len(pages_dict)
             # Parse selected pages
             if selected_pages:
+                try:
+                    selected_pages_list = json.loads(selected_pages)
+                except:
+                    selected_pages_list = []
+            # If no pages selected, select all pages with content
+            if not selected_pages_list:
                 selected_pages_list = list(pages_dict.keys())
+            # Combine text from selected pages
+            for page_num in sorted(selected_pages_list):
                 if page_num in pages_dict:
                     text_content += f"\n--- Page {page_num} ---\n{pages_dict[page_num]}\n"
+            print(f"📄 PDF: {total_pages} total pages, selected {len(selected_pages_list)} pages, {len(text_content)} chars")
+        elif content_type == "youtube":
+            if not youtube_url:
+                raise HTTPException(status_code=400, detail="No YouTube URL provided")
+            text_content = extract_text_from_youtube(youtube_url, time_start, time_end)
+            if not text_content:
+                text_content = f"YouTube video content from: {youtube_url}\n\nNote: Transcript extraction may not be available for all videos."
+            print(f"📄 YouTube content length: {len(text_content)} chars")
+        else:
+            raise HTTPException(status_code=400, detail=f"Invalid content type: {content_type}")
+        # Validate content
         if len(text_content) < 100:
+            raise HTTPException(status_code=400, detail=f"Content too short ({len(text_content)} chars). Minimum 100 characters required for quality questions.")
         # Generate questions
+        questions = generate_questions_with_ai(text_content, difficulty, num_questions)
+        # Extract key concepts for flashcards
+        concepts = extract_key_concepts(text_content, 12)
+        flashcards = generate_flashcards(text_content, concepts, min(8, num_questions // 2))
         # Save to database
         conn = sqlite3.connect(DB_PATH)
         cursor = conn.cursor()
         # Save session
+        cursor.execute("""
+            INSERT INTO sessions (id, title, content_type, difficulty, selected_pages, total_pages, last_accessed)
+            VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
+        """, (
+            session_id, title, content_type, difficulty,
+            json.dumps(selected_pages_list) if selected_pages_list else None,
+            total_pages
+        ))
         # Save pages
         for page_num, page_content in pages_dict.items():
+            cursor.execute("""
+                INSERT INTO pages (id, session_id, page_number, content)
+                VALUES (?, ?, ?, ?)
+            """, (generate_id("page"), session_id, page_num, page_content[:10000]))
         # Save questions
         for q in questions:
+            cursor.execute("""
+                INSERT INTO questions (id, session_id, question_text, question_type, options, correct_answer, difficulty, explanation, page_reference)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+            """, (
+                q["id"], session_id, q["question_text"], q["question_type"],
+                q.get("options"), q["correct_answer"], q["difficulty"],
+                q.get("explanation", ""), q.get("page_reference")
+            ))
+        # Save flashcards
+        for fc in flashcards:
+            cursor.execute("""
+                INSERT INTO flashcards (id, session_id, front, back, category, difficulty)
+                VALUES (?, ?, ?, ?, ?, ?)
+            """, (fc["id"], session_id, fc["front"], fc["back"], fc["category"], fc.get("difficulty", "medium")))
+        # Update user profile
+        cursor.execute("INSERT OR IGNORE INTO user_profile (id) VALUES (1)")
+        cursor.execute("UPDATE user_profile SET total_sessions_created = total_sessions_created + 1, last_active = CURRENT_TIMESTAMP WHERE id = 1")
         conn.commit()
         conn.close()
+        print(f"✅ Session created: {session_id} with {len(questions)} questions, {len(flashcards)} flashcards")
         return {
+            "success": True,
             "session_id": session_id,
             "question_count": len(questions),
+            "flashcard_count": len(flashcards),
             "total_pages": total_pages,
             "selected_pages": selected_pages_list
         }
     except HTTPException:
         raise
     except Exception as e:
+        print(f"❌ Error processing content: {str(e)}")
+        import traceback
+        traceback.print_exc()
         raise HTTPException(status_code=500, detail=str(e))
 @app.get("/api/session/{session_id}")
 async def get_session(session_id: str):
+    """Get complete session data including questions, flashcards, and pages"""
     conn = sqlite3.connect(DB_PATH)
     conn.row_factory = sqlite3.Row
     cursor = conn.cursor()
+    # Get session info
     cursor.execute("SELECT * FROM sessions WHERE id = ?", (session_id,))
     session = cursor.fetchone()
         conn.close()
         raise HTTPException(status_code=404, detail="Session not found")
+    # Update last accessed
+    cursor.execute("UPDATE sessions SET last_accessed = CURRENT_TIMESTAMP WHERE id = ?", (session_id,))
+    # Get questions
+    cursor.execute("SELECT * FROM questions WHERE session_id = ? ORDER BY created_at", (session_id,))
     questions = [dict(row) for row in cursor.fetchall()]
+    # Parse options JSON for multiple choice questions
+    for q in questions:
+        if q.get("options"):
+            try:
+                q["options"] = json.loads(q["options"])
+            except:
+                q["options"] = []
+    # Get flashcards
+    cursor.execute("SELECT * FROM flashcards WHERE session_id = ?", (session_id,))
+    flashcards = [dict(row) for row in cursor.fetchall()]
+    # Get pages
     cursor.execute("SELECT * FROM pages WHERE session_id = ? ORDER BY page_number", (session_id,))
     pages = [dict(row) for row in cursor.fetchall()]
+    # Calculate performance metrics
     total_questions = len(questions)
     correct_answers = sum(1 for q in questions if q.get("is_correct") == 1)
     accuracy = round((correct_answers / total_questions * 100) if total_questions > 0 else 0, 1)
+    conn.commit()
     conn.close()
     return {
         "session": dict(session),
         "questions": questions,
+        "flashcards": flashcards,
+        "pages": pages,
         "performance": {
             "total_questions": total_questions,
             "correct_answers": correct_answers,
+            "accuracy": accuracy,
+            "completion_rate": round((len([q for q in questions if q.get("user_answer")]) / total_questions * 100) if total_questions > 0 else 0, 1)
         }
     }
 @app.get("/api/user/sessions")
 async def get_user_sessions():
+    """Get all user sessions with basic stats"""
     conn = sqlite3.connect(DB_PATH)
     conn.row_factory = sqlite3.Row
     cursor = conn.cursor()
     cursor.execute("SELECT * FROM sessions ORDER BY last_accessed DESC")
     sessions = [dict(row) for row in cursor.fetchall()]
+    # Add question count and accuracy to each session
+    for session in sessions:
+        cursor.execute("SELECT COUNT(*), SUM(is_correct) FROM questions WHERE session_id = ?", (session["id"],))
+        result = cursor.fetchone()
+        total = result[0] or 0
+        correct = result[1] or 0
+        accuracy = round((correct / total * 100) if total > 0 else 0, 1)
+        session["question_count"] = total
+        session["accuracy"] = accuracy
     conn.close()
     return {"sessions": sessions}
 @app.post("/api/submit-answer")
     user_answer: str = Form(...),
     time_spent: int = Form(0)
 ):
+    """Submit and evaluate an answer"""
     conn = sqlite3.connect(DB_PATH)
     cursor = conn.cursor()
+    # Get question details
     cursor.execute("SELECT correct_answer, question_type FROM questions WHERE id = ? AND session_id = ?",
                    (question_id, session_id))
     result = cursor.fetchone()
     correct_answer = result[0]
     question_type = result[1]
+    # Evaluate based on question type
     is_correct = 0
     if question_type == "multiple_choice":
+        # Exact match for multiple choice
         is_correct = 1 if user_answer.strip() == correct_answer.strip() else 0
     elif question_type == "true_false":
+        # Case-insensitive match for true/false
         is_correct = 1 if user_answer.strip().lower() == correct_answer.strip().lower() else 0
+    elif question_type == "fill_blank":
+        # Flexible matching for fill in blank
+        user_clean = user_answer.strip().lower()
+        correct_clean = correct_answer.strip().lower()
+        is_correct = 1 if (user_clean == correct_clean or correct_clean in user_clean or user_clean in correct_clean) else 0
+    else:  # short_answer
         # Smart evaluation for short answers
+        user_clean = user_answer.strip().lower()
+        correct_clean = correct_answer.strip().lower()
+        # Extract key words from correct answer
+        key_words = re.findall(r'\b[a-z]{4,}\b', correct_clean)
+        key_words = [w for w in key_words if w not in ['this', 'that', 'these', 'those', 'there', 'their', 'would', 'could', 'should']]
+        if key_words:
+            # Count how many key words appear in user answer
+            matches = sum(1 for kw in key_words if kw in user_clean)
+            is_correct = 1 if matches >= len(key_words) * 0.4 else 0
+        else:
+            # Fallback: check length and similarity
+            is_correct = 1 if len(user_clean) > 30 or user_clean in correct_clean or correct_clean in user_clean else 0
+    # Update database
+    cursor.execute("""
+        UPDATE questions
+        SET user_answer = ?, is_correct = ?, time_spent = ?
+        WHERE id = ? AND session_id = ?
+    """, (user_answer, is_correct, time_spent, question_id, session_id))
+    # Update user profile
+    cursor.execute("UPDATE user_profile SET total_questions_answered = total_questions_answered + 1, total_correct_answers = total_correct_answers + ? WHERE id = 1", (is_correct,))
     conn.commit()
     conn.close()
     return {
         "is_correct": bool(is_correct),
         "correct_answer": correct_answer,
+        "feedback": "Correct! Great job!" if is_correct else f"The correct answer is: {correct_answer[:200]}"
     }
+@app.delete("/api/session/{session_id}")
+async def delete_session(session_id: str):
+    """Delete a session and all associated data"""
+    conn = sqlite3.connect(DB_PATH)
+    cursor = conn.cursor()
+    # Check if session exists
+    cursor.execute("SELECT id FROM sessions WHERE id = ?", (session_id,))
+    if not cursor.fetchone():
+        conn.close()
+        raise HTTPException(status_code=404, detail="Session not found")
+    # Delete session (cascade will delete questions, flashcards, pages)
+    cursor.execute("DELETE FROM sessions WHERE id = ?", (session_id,))
+    conn.commit()
+    affected = cursor.rowcount
+    conn.close()
+    return {"message": "Session deleted successfully", "affected": affected}
+@app.post("/api/save-note")
+async def save_note(
+    session_id: str = Form(...),
+    title: str = Form(...),
+    content: str = Form(...),
+    note_id: str = Form(None)
+):
+    """Save or update a note for a session"""
+    conn = sqlite3.connect(DB_PATH)
+    cursor = conn.cursor()
+    if note_id:
+        # Update existing note
+        cursor.execute("""
+            UPDATE notes SET title = ?, content = ?, updated_at = CURRENT_TIMESTAMP
+            WHERE id = ? AND session_id = ?
+        """, (title, content, note_id, session_id))
+    else:
+        # Create new note
+        note_id = generate_id("note")
+        cursor.execute("""
+            INSERT INTO notes (id, session_id, title, content)
+            VALUES (?, ?, ?, ?)
+        """, (note_id, session_id, title, content))
+    conn.commit()
+    conn.close()
+    return {"success": True, "note_id": note_id}
+@app.get("/api/user/profile")
+async def get_user_profile():
+    """Get user profile with statistics"""
+    conn = sqlite3.connect(DB_PATH)
+    conn.row_factory = sqlite3.Row
+    cursor = conn.cursor()
+    cursor.execute("SELECT * FROM user_profile WHERE id = 1")
+    profile = cursor.fetchone()
+    if not profile:
+        profile = {
+            "total_questions_answered": 0,
+            "total_correct_answers": 0,
+            "total_study_time": 0,
+            "total_sessions_created": 0
+        }
+    else:
+        profile = dict(profile)
+    # Calculate overall accuracy
+    total = profile.get("total_questions_answered", 0)
+    correct = profile.get("total_correct_answers", 0)
+    accuracy = round((correct / total * 100) if total > 0 else 0, 1)
+    conn.close()
+    return {
+        "profile": profile,
+        "accuracy": accuracy,
+        "streak": 0,  # Would need additional logic for streak
+        "total_study_minutes": profile.get("total_study_time", 0) // 60
+    }
+@app.post("/api/update-study-time")
+async def update_study_time(
+    session_id: str = Form(...),
+    time_spent: int = Form(0)
+):
+    """Update total study time"""
+    conn = sqlite3.connect(DB_PATH)
+    cursor = conn.cursor()
+    cursor.execute("UPDATE user_profile SET total_study_time = total_study_time + ? WHERE id = 1", (time_spent,))
+    cursor.execute("UPDATE sessions SET last_accessed = CURRENT_TIMESTAMP WHERE id = ?", (session_id,))
+    conn.commit()
+    conn.close()
+    return {"success": True}
+# ==================== MAIN ENTRY POINT ====================
 if __name__ == "__main__":
     import uvicorn
+    print("=" * 60)
+    print("🚀 StudyFlow AI Backend Server")
+    print("=" * 60)
+    print(f"📁 Database: {DB_PATH}")
+    print(f"🤖 AI Available: {bool(HF_API_TOKEN and HF_API_TOKEN != '')}")
+    if HF_API_TOKEN:
+        print(f"🔑 HF API Token: {HF_API_TOKEN[:10]}...")
+    else:
+        print("⚠️  No HF API Token - using fallback question generation")
+        print("   Get a free token at: https://huggingface.co/settings/tokens")
+    print("=" * 60)
+    print("🌐 Server starting at: http://0.0.0.0:7860")
+    print("📖 API Docs: http://0.0.0.0:7860/docs")
+    print("=" * 60)
     uvicorn.run(app, host="0.0.0.0", port=7860)