Spaces:

AtPeak
/

creatorstudio-ai-backend-develop

Paused

App Files Files Community

matsuap commited on Jan 5

Commit

951d5c6

verified ·

1 Parent(s): 4064f62

Upload folder using huggingface_hub

Browse files

Files changed (13) hide show

api/auth.py +24 -0
api/podcast.py +119 -110
api/sources.py +3 -2
api/video_generator.py +3 -2
api/websocket_routes.py +354 -0
main.py +2 -1
models/db_models.py +10 -3
models/schemas.py +16 -5
services/flashcard_service.py +85 -22
services/mindmap_service.py +27 -18
services/podcast_service.py +145 -64
services/quiz_service.py +51 -13
services/report_service.py +56 -23

api/auth.py CHANGED Viewed

@@ -33,6 +33,30 @@ async def get_current_user(token: str = Depends(oauth2_scheme), db: Session = De
         raise credentials_exception
     return user
 @router.post("/register", response_model=UserResponse)
 async def register(user_in: UserCreate, db: Session = Depends(get_db)):
     db_user = db.query(db_models.User).filter(db_models.User.email == user_in.email).first()

         raise credentials_exception
     return user
+async def get_current_user_ws(token: str, db: Session):
+    """
+    WebSocket authentication - validates JWT token passed as query parameter.
+    Raises HTTPException if authentication fails.
+    """
+    credentials_exception = HTTPException(
+        status_code=status.HTTP_401_UNAUTHORIZED,
+        detail="Could not validate credentials",
+    )
+    try:
+        payload = jwt.decode(token, settings.SECRET_KEY, algorithms=[settings.ALGORITHM])
+        email: str = payload.get("sub")
+        if email is None:
+            raise credentials_exception
+        token_data = TokenData(email=email)
+    except JWTError:
+        raise credentials_exception
+    user = db.query(db_models.User).filter(db_models.User.email == token_data.email).first()
+    if user is None:
+        raise credentials_exception
+    return user
 @router.post("/register", response_model=UserResponse)
 async def register(user_in: UserCreate, db: Session = Depends(get_db)):
     db_user = db.query(db_models.User).filter(db_models.User.email == user_in.email).first()

api/podcast.py CHANGED Viewed

@@ -1,14 +1,16 @@
 import os
 import logging
 from datetime import datetime
-from fastapi import APIRouter, Depends, HTTPException
 from sqlalchemy.orm import Session
 from typing import Dict, List
 from api.auth import get_current_user
-from models.schemas import PodcastAnalyzeRequest, PodcastGenerateRequest
 from models import db_models
-from core.database import get_db
 from services.podcast_service import podcast_service
 from services.s3_service import s3_service
 from core import constants
@@ -27,57 +29,29 @@ async def get_podcast_config():
         "models": constants.PODCAST_MODALS
     }
-@router.post("/analyze")
-async def analyze_source(
-    request: PodcastAnalyzeRequest,
-    current_user: db_models.User = Depends(get_current_user),
-    db: Session = Depends(get_db)):
-    """
-    Analyzes a source file from S3 and proposes podcast structures.
-    """
     try:
-        # Verify file ownership via DB
-        source = db.query(db_models.Source).filter(
-            db_models.Source.s3_key == request.file_key,
-            db_models.Source.user_id == current_user.id
-        ).first()
-        if not source:
-             raise HTTPException(status_code=403, detail="Not authorized to access this file or file does not exist")
-        analysis = await podcast_service.analyze_pdf(
-            file_key=request.file_key,
-            duration_minutes=request.duration_minutes
-        )
-        return {"analysis": analysis}
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Analysis failed: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
-@router.post("/generate")
-async def generate_podcast(
-    request: PodcastGenerateRequest,
-    current_user: db_models.User = Depends(get_current_user),
-    db: Session = Depends(get_db)
-):
-    """
-    Generates a podcast script and then the audio.
-    Saves metadata to DB and returns the generated info.
-    """
-    try:
-        # 1. Verify file ownership if provided
         if request.file_key:
-            source = db.query(db_models.Source).filter(
-                db_models.Source.s3_key == request.file_key,
-                db_models.Source.user_id == current_user.id
-            ).first()
-            if not source:
-                raise HTTPException(status_code=403, detail="Not authorized to access this file")
-            source_id = source.id
-        else:
-            source_id = None
         # 2. Generate Script
         script = await podcast_service.generate_script(
@@ -85,12 +59,14 @@ async def generate_podcast(
             model=request.model,
             duration_minutes=request.duration_minutes,
             podcast_format=request.podcast_format,
-            pdf_suggestions=request.pdf_suggestions,
             file_key=request.file_key
         )
         if not script:
-             raise HTTPException(status_code=500, detail="Failed to generate script")
         # 3. Generate Audio
         audio_path = await podcast_service.generate_full_audio(
@@ -103,83 +79,115 @@ async def generate_podcast(
         )
         if not audio_path:
-             raise HTTPException(status_code=500, detail="Failed to generate audio")
         # 4. Upload to S3
         filename = os.path.basename(audio_path)
-        with open(audio_path, "rb") as f:
-            content = f.read()
-        s3_key = f"users/{current_user.id}/outputs/podcasts/{filename}"
-        import boto3
-        from core.config import settings
-        s3_client = boto3.client('s3',
-                                aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
-                                aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
-                                region_name=settings.AWS_REGION)
-        s3_client.put_object(Bucket=settings.AWS_S3_BUCKET, Key=s3_key, Body=content)
-        public_url = s3_service.get_public_url(s3_key)
-        private_url = s3_service.get_presigned_url(s3_key)
-        # 5. Save to DB
-        db_podcast = db_models.Podcast(
-            title=f"Podcast {datetime.utcnow().strftime('%Y-%m-%d %H:%M')}",
-            s3_key=s3_key,
-            s3_url=public_url,
-            script=script,
-            user_id=current_user.id,
-            source_id=source_id
-        )
-        db.add(db_podcast)
         db.commit()
-        db.refresh(db_podcast)
-        # Clean up local file
-        os.remove(audio_path)
-        return {
-            "id": db_podcast.id,
-            "message": "Podcast generated successfully",
-            "script": script,
-            "public_url": public_url,
-            "private_url": private_url
-        }
-    except HTTPException:
-        raise
     except Exception as e:
-        logger.error(f"Podcast generation failed: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
-@router.get("/list")
 async def list_podcasts(
     current_user: db_models.User = Depends(get_current_user),
     db: Session = Depends(get_db)
 ):
     """
-    Lists all podcasts for the current user.
     """
     try:
         podcasts = db.query(db_models.Podcast).filter(
             db_models.Podcast.user_id == current_user.id
         ).order_by(db_models.Podcast.created_at.desc()).all()
-        return [
-            {
-                "id": p.id,
-                "title": p.title,
-                "s3_key": p.s3_key,
-                "public_url": p.s3_url,
-                "private_url": s3_service.get_presigned_url(p.s3_key),
-                "script_preview": (p.script[:200] + "...") if p.script else "",
-                "parent_file_id": p.source_id,
-                "parent_file_key": p.source.s3_key if p.source else None,
-                "created_at": p.created_at
-            }
-            for p in podcasts
-        ]
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
@@ -201,8 +209,9 @@ async def delete_podcast(
         raise HTTPException(status_code=404, detail="Podcast not found")
     try:
-        # 1. Delete from S3
-        await s3_service.delete_file(podcast.s3_key)
         # 2. Delete from DB
         db.delete(podcast)

 import os
+import asyncio
 import logging
 from datetime import datetime
+from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
 from sqlalchemy.orm import Session
 from typing import Dict, List
+from api.websocket_routes import manager
 from api.auth import get_current_user
+from models.schemas import PodcastGenerateRequest, PodcastResponse
 from models import db_models
+from core.database import get_db, SessionLocal
 from services.podcast_service import podcast_service
 from services.s3_service import s3_service
 from core import constants
         "models": constants.PODCAST_MODALS
     }
+async def run_podcast_generation(podcast_id: int, request: PodcastGenerateRequest, user_id: int):
+    """Background task to generate podcast and update status."""
+    db = SessionLocal()
     try:
+        podcast = db.query(db_models.Podcast).filter(db_models.Podcast.id == podcast_id).first()
+        if not podcast:
+            return
+        podcast.status = "processing"
+        db.commit()
+        # Notify via WebSocket if connected
+        connection_id = f"user_{user_id}"
+        await manager.send_progress(connection_id, 10, "processing", "Analyzing source file...")
+        # 1. Analyze first if file is provided
+        analysis_report = ""
         if request.file_key:
+            analysis_report = await podcast_service.analyze_pdf(
+                file_key=request.file_key,
+                duration_minutes=request.duration_minutes
+            )
+            await manager.send_progress(connection_id, 20, "processing", "Generating podcast script...")
         # 2. Generate Script
         script = await podcast_service.generate_script(
             model=request.model,
             duration_minutes=request.duration_minutes,
             podcast_format=request.podcast_format,
+            pdf_suggestions=analysis_report,
             file_key=request.file_key
         )
         if not script:
+            raise Exception("Failed to generate script")
+        await manager.send_progress(connection_id, 40, "processing", "Generating audio (this may take several minutes)...")
         # 3. Generate Audio
         audio_path = await podcast_service.generate_full_audio(
         )
         if not audio_path:
+            raise Exception("Failed to generate audio")
+        await manager.send_progress(connection_id, 85, "processing", "Uploading to S3...")
         # 4. Upload to S3
         filename = os.path.basename(audio_path)
+        s3_key = f"users/{user_id}/outputs/podcasts/{filename}"
+        def upload_audio():
+            with open(audio_path, "rb") as f:
+                content = f.read()
+            import boto3
+            from core.config import settings
+            s3_client = boto3.client('s3',
+                                    aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
+                                    aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
+                                    region_name=settings.AWS_REGION)
+            s3_client.put_object(Bucket=settings.AWS_S3_BUCKET, Key=s3_key, Body=content)
+            return content
+        await asyncio.to_thread(upload_audio)
+        public_url = s3_service.get_public_url(s3_key)
+        # 5. Final update to DB
+        podcast.s3_key = s3_key
+        podcast.s3_url = public_url
+        podcast.script = script
+        podcast.status = "completed"
         db.commit()
+        # Notify completion
+        await manager.send_result(connection_id, {
+            "id": podcast.id,
+            "status": "completed",
+            "title": podcast.title,
+            "public_url": public_url
+        })
+        # Clean up
+        if os.path.exists(audio_path):
+            os.remove(audio_path)
     except Exception as e:
+        logger.error(f"Background podcast generation failed for ID {podcast_id}: {e}")
+        podcast = db.query(db_models.Podcast).filter(db_models.Podcast.id == podcast_id).first()
+        if podcast:
+            podcast.status = "failed"
+            podcast.error_message = str(e)
+            db.commit()
+        connection_id = f"user_{user_id}"
+        await manager.send_error(connection_id, f"Generation failed: {str(e)}")
+    finally:
+        db.close()
+@router.post("/generate", response_model=PodcastResponse)
+async def generate_podcast(
+    request: PodcastGenerateRequest,
+    background_tasks: BackgroundTasks,
+    current_user: db_models.User = Depends(get_current_user),
+    db: Session = Depends(get_db)
+):
+    """
+    Initiates podcast generation in the background.
+    Creates a 'pending' record immediately and returns it.
+    """
+    # 1. Verify file ownership if provided
+    source_id = None
+    if request.file_key:
+        source = db.query(db_models.Source).filter(
+            db_models.Source.s3_key == request.file_key,
+            db_models.Source.user_id == current_user.id
+        ).first()
+        if not source:
+            raise HTTPException(status_code=403, detail="Not authorized to access this file")
+        source_id = source.id
+    # 2. Create pending record
+    db_podcast = db_models.Podcast(
+        title=f"Podcast {datetime.utcnow().strftime('%Y-%m-%d %H:%M')}",
+        user_id=current_user.id,
+        source_id=source_id,
+        status="processing"
+    )
+    db.add(db_podcast)
+    db.commit()
+    db.refresh(db_podcast)
+    # 3. Add to background tasks
+    background_tasks.add_task(run_podcast_generation, db_podcast.id, request, current_user.id)
+    return db_podcast
+@router.get("/list", response_model=List[PodcastResponse])
 async def list_podcasts(
     current_user: db_models.User = Depends(get_current_user),
     db: Session = Depends(get_db)
 ):
     """
+    Lists all podcasts for the current user including their generation status.
     """
     try:
         podcasts = db.query(db_models.Podcast).filter(
             db_models.Podcast.user_id == current_user.id
         ).order_by(db_models.Podcast.created_at.desc()).all()
+        return [PodcastResponse.model_validate(p) for p in podcasts]
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
         raise HTTPException(status_code=404, detail="Podcast not found")
     try:
+        # 1. Delete from S3 if it exists
+        if podcast.s3_key:
+            await s3_service.delete_file(podcast.s3_key)
         # 2. Delete from DB
         db.delete(podcast)

api/sources.py CHANGED Viewed

@@ -130,8 +130,9 @@ async def delete_source(
         db.commit() # Commit deletions
-        # 3. Delete from S3
-        await s3_service.delete_file(source.s3_key)
         # 4. Delete the Source itself from Database
         db.delete(source)

         db.commit() # Commit deletions
+        # 3. Delete from S3 if it exists
+        if source.s3_key:
+            await s3_service.delete_file(source.s3_key)
         # 4. Delete the Source itself from Database
         db.delete(source)

api/video_generator.py CHANGED Viewed

@@ -123,8 +123,9 @@ async def delete_video_summary(
         raise HTTPException(status_code=404, detail="Video summary not found")
     try:
-        # 1. Delete from S3
-        await s3_service.delete_file(summary.s3_key)
         # 2. Delete from DB
         db.delete(summary)

         raise HTTPException(status_code=404, detail="Video summary not found")
     try:
+        # 1. Delete from S3 if it exists
+        if summary.s3_key:
+            await s3_service.delete_file(summary.s3_key)
         # 2. Delete from DB
         db.delete(summary)

api/websocket_routes.py ADDED Viewed

	@@ -0,0 +1,354 @@

+import logging
+import asyncio
+from fastapi import APIRouter, WebSocket, WebSocketDisconnect, Depends
+from sqlalchemy.orm import Session
+from datetime import datetime
+from typing import Dict, Any
+from api.auth import get_current_user_ws
+from models import db_models
+from core.database import get_db
+from services.flashcard_service import flashcard_service
+from services.quiz_service import quiz_service
+from services.report_service import report_service
+from services.mindmap_service import mindmap_service
+from services.podcast_service import podcast_service
+from services.s3_service import s3_service
+from services.video_generator_service import video_generator_service
+router = APIRouter(prefix="/ws", tags=["websockets"])
+logger = logging.getLogger(__name__)
+class ConnectionManager:
+    """Manages WebSocket connections for parallel execution"""
+    def __init__(self):
+        self.active_connections: Dict[str, WebSocket] = {}
+    async def connect(self, websocket: WebSocket, connection_id: str):
+        await websocket.accept()
+        self.active_connections[connection_id] = websocket
+        logger.info(f"WebSocket connected: {connection_id}")
+    def disconnect(self, connection_id: str):
+        if connection_id in self.active_connections:
+            del self.active_connections[connection_id]
+            logger.info(f"WebSocket disconnected: {connection_id}")
+    async def send_progress(self, connection_id: str, progress: int, status: str, message: str = ""):
+        if connection_id in self.active_connections:
+            try:
+                await self.active_connections[connection_id].send_json({
+                    "type": "progress",
+                    "progress": progress,
+                    "status": status,
+                    "message": message
+                })
+            except Exception as e:
+                logger.error(f"Error sending progress to {connection_id}: {e}")
+    async def send_result(self, connection_id: str, data: Any):
+        if connection_id in self.active_connections:
+            try:
+                await self.active_connections[connection_id].send_json({
+                    "type": "result",
+                    "status": "complete",
+                    "progress": 100,
+                    "data": data
+                })
+            except Exception as e:
+                logger.error(f"Error sending result to {connection_id}: {e}")
+    async def send_error(self, connection_id: str, error: str):
+        if connection_id in self.active_connections:
+            try:
+                await self.active_connections[connection_id].send_json({
+                    "type": "error",
+                    "status": "error",
+                    "message": error
+                })
+            except Exception as e:
+                logger.error(f"Error sending error to {connection_id}: {e}")
+manager = ConnectionManager()
+@router.websocket("/generate")
+async def unified_generate_ws(
+    websocket: WebSocket,
+    token: str,
+    db: Session = Depends(get_db)):
+    """
+    Unified WebSocket gateway for all generation tasks.
+    Client sends JSON: { "type": "podcast|flashcards|quiz|mindmap|report|video", "data": { ... } }
+    """
+    await websocket.accept()
+    try:
+        current_user = await get_current_user_ws(token, db)
+        connection_id = f"user_{current_user.id}"
+        manager.active_connections[connection_id] = websocket
+        # Receive the task specification
+        message = await websocket.receive_json()
+        task_type = message.get("type")
+        data = message.get("data", {})
+        if not task_type:
+            await manager.send_error(connection_id, "Missing 'type' in request")
+            return
+        await manager.send_progress(connection_id, 2, "processing", f"Initializing {task_type} task...")
+        # --- ROUTING LOGIC ---
+        if task_type == "podcast":
+            await handle_podcast_task(connection_id, data, current_user, db)
+        elif task_type == "flashcards":
+            await handle_flashcards_task(connection_id, data, current_user, db)
+        elif task_type == "quiz":
+            await handle_quiz_task(connection_id, data, current_user, db)
+        else:
+            await manager.send_error(connection_id, f"Unsupported task type: {task_type}")
+    except WebSocketDisconnect:
+        logger.info(f"Client disconnected")
+    except Exception as e:
+        logger.error(f"Unified WebSocket error: {e}")
+        try:
+            await manager.send_error(connection_id, str(e))
+        except: pass
+    finally:
+        if 'connection_id' in locals():
+            manager.disconnect(connection_id)
+async def handle_podcast_task(connection_id: str, data: Dict, current_user: db_models.User, db: Session):
+    """Internal handler for podcast generation"""
+    try:
+        source_id = None
+        if data.get("file_key"):
+            source = db.query(db_models.Source).filter(
+                db_models.Source.s3_key == data["file_key"],
+                db_models.Source.user_id == current_user.id
+            ).first()
+            if not source:
+                await manager.send_error(connection_id, "Not authorized to access this file")
+                return
+            source_id = source.id
+        db_podcast = db_models.Podcast(
+            title=f"Podcast {datetime.utcnow().strftime('%Y-%m-%d %H:%M')}",
+            user_id=current_user.id,
+            source_id=source_id,
+            status="processing"
+        )
+        db.add(db_podcast)
+        db.commit()
+        db.refresh(db_podcast)
+        db_podcast.status = "processing"
+        db.commit()
+        analysis_report = ""
+        if data.get("file_key"):
+            await manager.send_progress(connection_id, 10, "processing", "Analyzing source file...")
+            analysis_report = await podcast_service.analyze_pdf(
+                file_key=data["file_key"],
+                duration_minutes=data.get("duration_minutes", 10)
+            )
+        await manager.send_progress(connection_id, 15, "processing", "Generating podcast script...")
+        script = await podcast_service.generate_script(
+            user_prompt=data["user_prompt"],
+            model=data.get("model", "gpt-4o"),
+            duration_minutes=data.get("duration_minutes", 10),
+            podcast_format=data.get("podcast_format", "conversational"),
+            pdf_suggestions=analysis_report,
+            file_key=data.get("file_key")
+        )
+        if not script: raise Exception("Failed to generate script")
+        await manager.send_progress(connection_id, 45, "processing", "Generating audio...")
+        audio_path = await podcast_service.generate_full_audio(
+            script=script,
+            tts_model=data.get("tts_model", "gemini-2.0-flash-exp"),
+            spk1_voice=data.get("spk1_voice", "Puck"),
+            spk2_voice=data.get("spk2_voice", "Charon"),
+            temperature=data.get("temperature", 1.0),
+            bgm_choice=data.get("bgm_choice", "No BGM")
+        )
+        if not audio_path: raise Exception("Failed to generate audio")
+        await manager.send_progress(connection_id, 90, "processing", "Uploading to S3...")
+        import os
+        filename = os.path.basename(audio_path)
+        s3_key = f"users/{current_user.id}/outputs/podcasts/{filename}"
+        def upload_audio_sync():
+            with open(audio_path, "rb") as f:
+                content = f.read()
+            import boto3
+            from core.config import settings
+            boto3.client('s3',
+                         aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
+                         aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
+                         region_name=settings.AWS_REGION).put_object(Bucket=settings.AWS_S3_BUCKET, Key=s3_key, Body=content)
+        await asyncio.to_thread(upload_audio_sync)
+        public_url = s3_service.get_public_url(s3_key)
+        db_podcast.s3_key = s3_key
+        db_podcast.s3_url = public_url
+        db_podcast.script = script
+        db_podcast.status = "completed"
+        db.commit()
+        if os.path.exists(audio_path): os.remove(audio_path)
+        await manager.send_result(connection_id, {
+            "id": db_podcast.id,
+            "status": "completed",
+            "message": "Podcast generated successfully",
+            "public_url": public_url
+        })
+    except Exception as e:
+        logger.error(f"Podcast task failed: {e}")
+        if 'db_podcast' in locals():
+            db_podcast.status = "failed"
+            db_podcast.error_message = str(e)
+            db.commit()
+        await manager.send_error(connection_id, str(e))
+async def handle_flashcards_task(connection_id: str, data: Dict, current_user: db_models.User, db: Session):
+    """Internal handler for flashcard generation"""
+    try:
+        source_id = None
+        source = None
+        if data.get("file_key"):
+            source = db.query(db_models.Source).filter(
+                db_models.Source.s3_key == data["file_key"],
+                db_models.Source.user_id == current_user.id
+            ).first()
+            if not source:
+                await manager.send_error(connection_id, "Not authorized to access this file")
+                return
+            source_id = source.id
+        await manager.send_progress(connection_id, 10, "processing", "Generating flashcards...")
+        cards_data = await flashcard_service.generate_flashcards(
+            file_key=data.get("file_key"),
+            text_input=data.get("text_input"),
+            difficulty=data.get("difficulty", "medium"),
+            quantity=data.get("quantity", "standard"),
+            topic=data.get("topic"),
+            language=data.get("language", "English"),
+            progress_callback=lambda p, m: asyncio.create_task(
+                manager.send_progress(connection_id, 10 + int(p * 0.7), "processing", m)
+            )
+        )
+        if not cards_data:
+            await manager.send_error(connection_id, "AI returned an empty response")
+            return
+        await manager.send_progress(connection_id, 85, "processing", "Saving to database...")
+        title = data.get("topic", f"Flashcards {len(cards_data)}")
+        db_set = db_models.FlashcardSet(
+            title=title,
+            difficulty=data.get("difficulty", "medium"),
+            user_id=current_user.id,
+            source_id=source_id
+        )
+        db.add(db_set)
+        db.commit()
+        db.refresh(db_set)
+        for item in cards_data:
+            db_card = db_models.Flashcard(
+                flashcard_set_id=db_set.id,
+                question=item.get("question", ""),
+                answer=item.get("answer", "")
+            )
+            db.add(db_card)
+        db.commit()
+        db.refresh(db_set)
+        await manager.send_result(connection_id, {
+            "id": db_set.id,
+            "title": db_set.title,
+            "flashcards_count": len(db_set.flashcards)
+        })
+    except Exception as e:
+        logger.error(f"Flashcard task failed: {e}")
+        await manager.send_error(connection_id, str(e))
+async def handle_quiz_task(connection_id: str, data: Dict, current_user: db_models.User, db: Session):
+    """Internal handler for quiz generation"""
+    try:
+        source_id = None
+        if data.get("file_key"):
+            source = db.query(db_models.Source).filter(
+                db_models.Source.s3_key == data["file_key"],
+                db_models.Source.user_id == current_user.id
+            ).first()
+            if not source:
+                await manager.send_error(connection_id, "Not authorized to access this file")
+                return
+            source_id = source.id
+        await manager.send_progress(connection_id, 10, "processing", "Generating quiz...")
+        quizzes_data = await quiz_service.generate_quiz(
+            file_key=data.get("file_key"),
+            text_input=data.get("text_input"),
+            difficulty=data.get("difficulty", "medium"),
+            topic=data.get("topic"),
+            language=data.get("language", "English"),
+            count_mode=data.get("count", "STANDARD"),
+            progress_callback=lambda p, m: asyncio.create_task(
+                manager.send_progress(connection_id, 10 + int(p * 0.7), "processing", m)
+            )
+        )
+        if not quizzes_data:
+            await manager.send_error(connection_id, "Failed to generate quiz")
+            return
+        db_set = db_models.QuizSet(
+            title=data.get("topic", "Quiz"),
+            difficulty=data.get("difficulty", "medium"),
+            user_id=current_user.id,
+            source_id=source_id
+        )
+        db.add(db_set)
+        db.commit()
+        db.refresh(db_set)
+        for item in quizzes_data:
+            db_question = db_models.QuizQuestion(
+                quiz_set_id=db_set.id,
+                question=item.get("question", ""),
+                choices=item.get("choices", {}),
+                answer=item.get("answer", "1"),
+                explanation=item.get("explanation", "")
+            )
+            db.add(db_question)
+        db.commit()
+        await manager.send_result(connection_id, {"id": db_set.id, "title": db_set.title})
+    except Exception as e:
+        logger.error(f"Quiz task failed: {e}")
+        await manager.send_error(connection_id, str(e))

main.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from core.database import init_db
-from api import auth, sources, podcast, flashcards, mindmaps, quizzes, reports, video_generator, rag, chat
 # Initialize Database Tables
 init_db()
@@ -32,6 +32,7 @@ app.include_router(reports.router)
 app.include_router(video_generator.router)
 app.include_router(rag.router)
 app.include_router(chat.router)
 @app.get("/")
 async def root():

 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from core.database import init_db
+from api import auth, sources, podcast, flashcards, mindmaps, quizzes, reports, video_generator, rag, chat, websocket_routes
 # Initialize Database Tables
 init_db()
 app.include_router(video_generator.router)
 app.include_router(rag.router)
 app.include_router(chat.router)
+app.include_router(websocket_routes.router)  # WebSocket endpoints for real-time progress
 @app.get("/")
 async def root():

models/db_models.py CHANGED Viewed

@@ -47,9 +47,11 @@ class Podcast(Base):
     id = Column(Integer, primary_key=True, index=True)
     title = Column(Unicode(255))
-    s3_key = Column(String(512), nullable=False)
-    s3_url = Column(String(1024), nullable=False)
-    script = Column(UnicodeText)
     user_id = Column(Integer, ForeignKey("users.id"))
     source_id = Column(Integer, ForeignKey("sources.id"), nullable=True)
     created_at = Column(DateTime(timezone=True), server_default=func.now())
@@ -57,6 +59,11 @@ class Podcast(Base):
     owner = relationship("User", back_populates="podcasts")
     source = relationship("Source", back_populates="podcasts")
 class FlashcardSet(Base):
     __tablename__ = "flashcard_sets"

     id = Column(Integer, primary_key=True, index=True)
     title = Column(Unicode(255))
+    s3_key = Column(String(512), nullable=True)
+    s3_url = Column(String(1024), nullable=True)
+    script = Column(UnicodeText, nullable=True)
+    status = Column(String(50), default="processing") # pending, processing, completed, failed
+    error_message = Column(UnicodeText, nullable=True)
     user_id = Column(Integer, ForeignKey("users.id"))
     source_id = Column(Integer, ForeignKey("sources.id"), nullable=True)
     created_at = Column(DateTime(timezone=True), server_default=func.now())
     owner = relationship("User", back_populates="podcasts")
     source = relationship("Source", back_populates="podcasts")
+    @property
+    def parent_file_key(self):
+        return self.source.s3_key if self.source else None
 class FlashcardSet(Base):
     __tablename__ = "flashcard_sets"

models/schemas.py CHANGED Viewed

@@ -44,16 +44,11 @@ class SourceFileResponse(BaseModel):
         from_attributes = True
 # Podcast Schemas
-class PodcastAnalyzeRequest(BaseModel):
-    file_key: str
-    duration_minutes: int = 10
 class PodcastGenerateRequest(BaseModel):
     user_prompt: str
     model: str = "gpt-4o"
     duration_minutes: int = 10
     podcast_format: str = "deep dive"
-    pdf_suggestions: str = ""
     file_key: Optional[str] = None
     tts_model: str = "gemini-2.5-flash-preview-tts"
     spk1_voice: str = "Zephyr"
@@ -61,6 +56,22 @@ class PodcastGenerateRequest(BaseModel):
     bgm_choice: str = "No BGM"
     temperature: float = 1.0
 # Flashcard Schemas
 class FlashcardItem(BaseModel):
     question: str

         from_attributes = True
 # Podcast Schemas
 class PodcastGenerateRequest(BaseModel):
     user_prompt: str
     model: str = "gpt-4o"
     duration_minutes: int = 10
     podcast_format: str = "deep dive"
     file_key: Optional[str] = None
     tts_model: str = "gemini-2.5-flash-preview-tts"
     spk1_voice: str = "Zephyr"
     bgm_choice: str = "No BGM"
     temperature: float = 1.0
+class PodcastResponse(BaseModel):
+    id: int
+    title: Optional[str]
+    s3_key: Optional[str]
+    s3_url: Optional[str]
+    script: Optional[str]
+    status: str = "completed"
+    error_message: Optional[str]
+    parent_file_id: Optional[int] = None
+    parent_file_key: Optional[str] = None
+    created_at: datetime
+    class Config:
+        from_attributes = True
 # Flashcard Schemas
 class FlashcardItem(BaseModel):
     question: str

services/flashcard_service.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import json
 import logging
 import os
 import tempfile
-from typing import List, Dict, Optional, Any
 import openai
 from botocore.exceptions import ClientError
@@ -23,34 +24,57 @@ class FlashcardService:
         difficulty: str = "medium",
         quantity: str = "standard",
         topic: Optional[str] = None,
-        language: str = "English"
     ) -> List[Dict[str, str]]:
         """
-        Generates flashcards from either an S3 PDF or direct text input (Original File-ID Method).
         """
         try:
             system_prompt = get_flashcard_system_prompt(difficulty, quantity, language)
             if topic:
                 system_prompt += get_flashcard_topic_prompt(topic)
             if file_key:
-                # Download PDF from S3
                 tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
                 tmp_path = tmp.name
                 tmp.close()
                 try:
-                    s3_service.s3_client.download_file(
                         settings.AWS_S3_BUCKET,
                         file_key,
                         tmp_path
                     )
-                    with open(tmp_path, "rb") as f:
-                        uploaded_file = self.openai_client.files.create(
-                            file=f,
-                            purpose="assistants"
-                        )
                     messages = [
                         {"role": "system", "content": system_prompt},
@@ -65,26 +89,41 @@ class FlashcardService:
                         }
                     ]
-                    response = self.openai_client.chat.completions.create(
                         model="gpt-4o-mini",
                         messages=messages,
                         temperature=0.7
                     )
-                    # Clean up OpenAI file
-                    self.openai_client.files.delete(uploaded_file.id)
                     raw_content = response.choices[0].message.content
                 finally:
                     if os.path.exists(tmp_path):
-                        os.remove(tmp_path)
             elif text_input:
                 messages = [
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": text_input}
                 ]
-                response = self.openai_client.chat.completions.create(
                     model="gpt-4o-mini",
                     messages=messages,
                     temperature=0.7
@@ -94,6 +133,9 @@ class FlashcardService:
             else:
                 raise ValueError("Either file_key or text_input must be provided")
             # Parse JSON
             if "```json" in raw_content:
                 raw_content = raw_content.split("```json")[1].split("```")[0].strip()
@@ -109,6 +151,7 @@ class FlashcardService:
     async def generate_explanation(self, question: str, file_key: Optional[str] = None, language: str = "English") -> str:
         """
         Generates a detailed explanation for a flashcard question.
         """
         try:
             explanation_prompt = get_flashcard_explanation_prompt(question, language)
@@ -119,33 +162,53 @@ class FlashcardService:
                 tmp.close()
                 try:
-                    s3_service.s3_client.download_file(
                         settings.AWS_S3_BUCKET,
                         file_key,
                         tmp_path
                     )
-                    with open(tmp_path, "rb") as f:
-                        uploaded_file = self.openai_client.files.create(file=f, purpose="assistants")
                     messages = [
                         {"role": "system", "content": explanation_prompt},
                         {"role": "user", "content": [{"type": "file", "file": {"file_id": uploaded_file.id}}]}
                     ]
-                    response = self.openai_client.chat.completions.create(
                         model="gpt-4o-mini",
                         messages=messages
                     )
-                    self.openai_client.files.delete(uploaded_file.id)
                     return response.choices[0].message.content
                 finally:
                     if os.path.exists(tmp_path):
-                        os.remove(tmp_path)
             else:
                 messages = [
                     {"role": "system", "content": explanation_prompt},
                     {"role": "user", "content": f"Please explain the question: {question}"}
                 ]
-                response = self.openai_client.chat.completions.create(
                     model="gpt-4o-mini",
                     messages=messages
                 )

 import json
 import logging
 import os
+import asyncio
 import tempfile
+from typing import List, Dict, Optional, Any, Callable
 import openai
 from botocore.exceptions import ClientError
         difficulty: str = "medium",
         quantity: str = "standard",
         topic: Optional[str] = None,
+        language: str = "English",
+        progress_callback: Optional[Callable[[int, str], None]] = None
     ) -> List[Dict[str, str]]:
         """
+        Generates flashcards from either an S3 PDF or direct text input.
+        Uses asyncio.to_thread for all blocking I/O operations to enable parallel execution.
+        Args:
+            progress_callback: Optional callback function(progress: int, message: str) for progress updates
         """
         try:
+            if progress_callback:
+                progress_callback(5, "Preparing prompts...")
             system_prompt = get_flashcard_system_prompt(difficulty, quantity, language)
             if topic:
                 system_prompt += get_flashcard_topic_prompt(topic)
             if file_key:
+                if progress_callback:
+                    progress_callback(15, "Downloading file from S3...")
+                # Download PDF from S3 (non-blocking)
                 tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
                 tmp_path = tmp.name
                 tmp.close()
                 try:
+                    # Use asyncio.to_thread for S3 download
+                    await asyncio.to_thread(
+                        s3_service.s3_client.download_file,
                         settings.AWS_S3_BUCKET,
                         file_key,
                         tmp_path
                     )
+                    if progress_callback:
+                        progress_callback(30, "Uploading to OpenAI...")
+                    # Read file and upload to OpenAI (non-blocking)
+                    def upload_to_openai():
+                        with open(tmp_path, "rb") as f:
+                            return self.openai_client.files.create(
+                                file=f,
+                                purpose="assistants"
+                            )
+                    uploaded_file = await asyncio.to_thread(upload_to_openai)
+                    if progress_callback:
+                        progress_callback(45, "Generating flashcards with AI...")
                     messages = [
                         {"role": "system", "content": system_prompt},
                         }
                     ]
+                    # Call OpenAI API (non-blocking)
+                    response = await asyncio.to_thread(
+                        self.openai_client.chat.completions.create,
                         model="gpt-4o-mini",
                         messages=messages,
                         temperature=0.7
                     )
+                    if progress_callback:
+                        progress_callback(75, "Cleaning up...")
+                    # Clean up OpenAI file (non-blocking)
+                    await asyncio.to_thread(
+                        self.openai_client.files.delete,
+                        uploaded_file.id
+                    )
                     raw_content = response.choices[0].message.content
                 finally:
+                    # Remove temp file (non-blocking)
                     if os.path.exists(tmp_path):
+                        await asyncio.to_thread(os.remove, tmp_path)
             elif text_input:
+                if progress_callback:
+                    progress_callback(20, "Generating flashcards with AI...")
                 messages = [
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": text_input}
                 ]
+                # Call OpenAI API (non-blocking)
+                response = await asyncio.to_thread(
+                    self.openai_client.chat.completions.create,
                     model="gpt-4o-mini",
                     messages=messages,
                     temperature=0.7
             else:
                 raise ValueError("Either file_key or text_input must be provided")
+            if progress_callback:
+                progress_callback(85, "Parsing results...")
             # Parse JSON
             if "```json" in raw_content:
                 raw_content = raw_content.split("```json")[1].split("```")[0].strip()
     async def generate_explanation(self, question: str, file_key: Optional[str] = None, language: str = "English") -> str:
         """
         Generates a detailed explanation for a flashcard question.
+        Uses asyncio.to_thread for all blocking I/O operations.
         """
         try:
             explanation_prompt = get_flashcard_explanation_prompt(question, language)
                 tmp.close()
                 try:
+                    # Download from S3 (non-blocking)
+                    await asyncio.to_thread(
+                        s3_service.s3_client.download_file,
                         settings.AWS_S3_BUCKET,
                         file_key,
                         tmp_path
                     )
+                    # Upload to OpenAI (non-blocking)
+                    def upload_to_openai():
+                        with open(tmp_path, "rb") as f:
+                            return self.openai_client.files.create(file=f, purpose="assistants")
+                    uploaded_file = await asyncio.to_thread(upload_to_openai)
                     messages = [
                         {"role": "system", "content": explanation_prompt},
                         {"role": "user", "content": [{"type": "file", "file": {"file_id": uploaded_file.id}}]}
                     ]
+                    # Call OpenAI API (non-blocking)
+                    response = await asyncio.to_thread(
+                        self.openai_client.chat.completions.create,
                         model="gpt-4o-mini",
                         messages=messages
                     )
+                    # Clean up OpenAI file (non-blocking)
+                    await asyncio.to_thread(
+                        self.openai_client.files.delete,
+                        uploaded_file.id
+                    )
                     return response.choices[0].message.content
                 finally:
+                    # Remove temp file (non-blocking)
                     if os.path.exists(tmp_path):
+                        await asyncio.to_thread(os.remove, tmp_path)
             else:
                 messages = [
                     {"role": "system", "content": explanation_prompt},
                     {"role": "user", "content": f"Please explain the question: {question}"}
                 ]
+                # Call OpenAI API (non-blocking)
+                response = await asyncio.to_thread(
+                    self.openai_client.chat.completions.create,
                     model="gpt-4o-mini",
                     messages=messages
                 )

services/mindmap_service.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import logging
 import os
 import tempfile
 from typing import Optional
 import openai
@@ -20,28 +21,34 @@ class MindMapService:
     ) -> str:
         """
         Generates a Mermaid mindmap from either an S3 PDF or direct text input.
         """
         try:
             system_prompt = get_mindmap_system_prompt()
             if file_key:
-                # Download PDF from S3
                 tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
                 tmp_path = tmp.name
                 tmp.close()
                 try:
-                    s3_service.s3_client.download_file(
                         settings.AWS_S3_BUCKET,
                         file_key,
                         tmp_path
                     )
-                    with open(tmp_path, "rb") as f:
-                        uploaded_file = self.openai_client.files.create(
-                            file=f,
-                            purpose="assistants"
-                        )
                     messages = [
                         {"role": "system", "content": system_prompt},
@@ -56,27 +63,35 @@ class MindMapService:
                         }
                     ]
-                    response = self.openai_client.chat.completions.create(
                         model="gpt-4o-mini",
                         messages=messages,
                         temperature=0.7
                     )
-                    # Clean up OpenAI file
-                    self.openai_client.files.delete(uploaded_file.id)
                     raw_content = response.choices[0].message.content
                 finally:
                     if os.path.exists(tmp_path):
-                        os.remove(tmp_path)
             elif text_input:
                 messages = [
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": text_input}
                 ]
-                response = self.openai_client.chat.completions.create(
                     model="gpt-4o-mini",
                     messages=messages,
                     temperature=0.7
@@ -92,12 +107,6 @@ class MindMapService:
             elif "```" in raw_content:
                 raw_content = raw_content.split("```")[1].split("```")[0].strip()
-            # Ensure it starts with 'mindmap'
-            if "mindmap" not in raw_content.lower():
-                # If the AI missed the header, we might need to handle it,
-                # but usually the prompt is strong.
-                pass
             return raw_content.strip()
         except Exception as e:

 import logging
 import os
+import asyncio
 import tempfile
 from typing import Optional
 import openai
     ) -> str:
         """
         Generates a Mermaid mindmap from either an S3 PDF or direct text input.
+        Uses asyncio.to_thread for all blocking I/O operations.
         """
         try:
             system_prompt = get_mindmap_system_prompt()
             if file_key:
+                # Download PDF from S3 (non-blocking)
                 tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
                 tmp_path = tmp.name
                 tmp.close()
                 try:
+                    await asyncio.to_thread(
+                        s3_service.s3_client.download_file,
                         settings.AWS_S3_BUCKET,
                         file_key,
                         tmp_path
                     )
+                    # Upload to OpenAI (non-blocking)
+                    def upload_to_openai():
+                        with open(tmp_path, "rb") as f:
+                            return self.openai_client.files.create(
+                                file=f,
+                                purpose="assistants"
+                            )
+                    uploaded_file = await asyncio.to_thread(upload_to_openai)
                     messages = [
                         {"role": "system", "content": system_prompt},
                         }
                     ]
+                    # Call OpenAI (non-blocking)
+                    response = await asyncio.to_thread(
+                        self.openai_client.chat.completions.create,
                         model="gpt-4o-mini",
                         messages=messages,
                         temperature=0.7
                     )
+                    # Clean up OpenAI file (non-blocking)
+                    await asyncio.to_thread(
+                        self.openai_client.files.delete,
+                        uploaded_file.id
+                    )
                     raw_content = response.choices[0].message.content
                 finally:
                     if os.path.exists(tmp_path):
+                        await asyncio.to_thread(os.remove, tmp_path)
             elif text_input:
                 messages = [
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": text_input}
                 ]
+                # Call OpenAI (non-blocking)
+                response = await asyncio.to_thread(
+                    self.openai_client.chat.completions.create,
                     model="gpt-4o-mini",
                     messages=messages,
                     temperature=0.7
             elif "```" in raw_content:
                 raw_content = raw_content.split("```")[1].split("```")[0].strip()
             return raw_content.strip()
         except Exception as e:

services/podcast_service.py CHANGED Viewed

@@ -3,6 +3,7 @@ import os
 import json
 import time
 import struct
 import logging
 import mimetypes
 from datetime import datetime
@@ -50,26 +51,35 @@ class PodcastService:
         elif duration_minutes <= 15: return 4000
         else: return 5000
-    async def analyze_pdf(self, file_key: str, duration_minutes: int, model: str = "gpt-4o"):
         # 1. Get file from S3
         # Since openai files.create needs a file, we download it temporarily
         temp_path = f"temp_{int(time.time())}.pdf"
         try:
             import boto3
-            s3 = boto3.client('s3',
-                              aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
-                              aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
-                              region_name=settings.AWS_REGION)
-            s3.download_file(settings.AWS_S3_BUCKET, file_key, temp_path)
-            # 2. Upload to OpenAI
-            with open(temp_path, "rb") as f:
-                file_response = self.openai_client.files.create(file=f, purpose="assistants")
-            # 3. Analyze
             formatted_prompt = ANALYSIS_PROMPT.format(duration_minutes=duration_minutes)
-            response = self.openai_client.chat.completions.parse(
                 model=model,
                 messages=[
                     {"role": "system", "content": formatted_prompt},
@@ -81,10 +91,11 @@ class PodcastService:
             return response.choices[0].message.content
         finally:
             if os.path.exists(temp_path):
-                os.remove(temp_path)
     async def generate_script(self, user_prompt: str, model: str, duration_minutes: int,
-                             podcast_format: str, pdf_suggestions: str, file_key: Optional[str] = None):
         target_words = self.compute_script_targets(duration_minutes)
         formatted_system = SYSTEM_PROMPT.format(
             target_words=target_words,
@@ -97,15 +108,24 @@ class PodcastService:
         temp_path = None
         if file_key:
             temp_path = f"temp_gen_{int(time.time())}.pdf"
-            import boto3
-            s3 = boto3.client('s3',
-                              aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
-                              aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
-                              region_name=settings.AWS_REGION)
-            s3.download_file(settings.AWS_S3_BUCKET, file_key, temp_path)
-            with open(temp_path, "rb") as f:
-                file_response = self.openai_client.files.create(file=f, purpose="assistants")
             messages.append({
                 "role": "user",
@@ -118,7 +138,9 @@ class PodcastService:
             messages.append({"role": "user", "content": user_prompt})
         try:
-            response = self.openai_client.chat.completions.create(
                 model=model,
                 messages=messages,
                 temperature=1.0,
@@ -127,14 +149,37 @@ class PodcastService:
             return response.choices[0].message.content
         finally:
             if temp_path and os.path.exists(temp_path):
-                os.remove(temp_path)
     def parse_script(self, script: str) -> List[Tuple[str, str]]:
         dialogs = []
-        pattern = re.compile(r"^(Speaker [12])[::]\s*(.*)$", re.MULTILINE)
-        for match in pattern.finditer(script):
-            speaker, text = match.groups()
-            dialogs.append((speaker, text))
         return dialogs
     def split_script(self, dialogs: List[Tuple[str, str]], chunk_size=20) -> List[str]:
@@ -144,9 +189,12 @@ class PodcastService:
             chunks.append("\n".join([f"{s}: {t}" for s, t in chunk]))
         return chunks
-    def generate_audio_chunk(self, chunk_script: str, tts_model: str, spk1_voice: str,
                            spk2_voice: str, temperature: float, index: int) -> Optional[str]:
         try:
             contents = [types.Content(role="user", parts=[types.Part.from_text(text=chunk_script)])]
             config = types.GenerateContentConfig(
                 temperature=temperature,
@@ -163,27 +211,37 @@ class PodcastService:
                 )
             )
             audio_data = None
             mime_type = "audio/wav"
-            for chunk in self.genai_client.models.generate_content_stream(model=tts_model, contents=contents, config=config):
                 if chunk.candidates and chunk.candidates[0].content.parts:
                     part = chunk.candidates[0].content.parts[0]
                     if part.inline_data:
                         audio_data = part.inline_data.data
                         mime_type = part.inline_data.mime_type
                         break
             if audio_data:
                 # Basic WAV conversion if needed (simplified from original)
                 if "wav" not in mime_type.lower():
                     # We usually get raw PCM or similar, need header
                     audio_data = self._convert_to_wav(audio_data, mime_type)
                 path = f"chunk_{index}_{int(time.time())}.wav"
                 with open(path, "wb") as f:
                     f.write(audio_data)
                 return path
         except Exception as e:
             logger.error(f"Error generating chunk {index}: {e}")
         return None
@@ -201,49 +259,72 @@ class PodcastService:
         return header + audio_data
     async def generate_full_audio(self, script: str, tts_model: str, spk1_voice: str,
-                                spk2_voice: str, temperature: float, bgm_choice: str):
         dialogs = self.parse_script(script)
         chunks = self.split_script(dialogs)
-        chunk_paths = [None] * len(chunks)
-        with ThreadPoolExecutor(max_workers=4) as executor:
-            futures = {executor.submit(self.generate_audio_chunk, chunks[i], tts_model, spk1_voice, spk2_voice, temperature, i): i for i in range(len(chunks))}
-            for future in as_completed(futures):
-                idx = futures[future]
-                chunk_paths[idx] = future.result()
-        valid_paths = [p for p in chunk_paths if p]
-        if not valid_paths: return None
-        # Combine
-        combined = AudioSegment.empty()
-        for p in valid_paths:
-            combined += AudioSegment.from_file(p)
-            combined += AudioSegment.silent(duration=500)
-            os.remove(p)
-        final_path = f"final_podcast_{int(time.time())}.wav"
-        # Mix BGM
-        bgm_path = BGM_CHOICES.get(bgm_choice)
-        if bgm_path and os.path.exists(bgm_path):
-            bgm = AudioSegment.from_file(bgm_path)
-            # Simple mix: loop BGM, fade in/out
-            if len(bgm) < len(combined) + 10000:
-                bgm = bgm * ( (len(combined) + 10000) // len(bgm) + 1 )
-            bgm = bgm[:len(combined) + 10000]
-            bgm_main = bgm[5000:5000+len(combined)] - 16
-            bgm_intro = bgm[:5000]
-            bgm_outro = bgm[5000+len(combined):].fade_out(5000) - 16
-            bgm_processed = bgm_intro + bgm_main + bgm_outro
-            combined_with_intro = AudioSegment.silent(duration=5000) + combined + AudioSegment.silent(duration=5000)
-            final_audio = combined_with_intro.overlay(bgm_processed)
-            final_audio.export(final_path, format="wav")
-        else:
-            combined.export(final_path, format="wav")
         return final_path
 podcast_service = PodcastService()

 import json
 import time
 import struct
+import asyncio
 import logging
 import mimetypes
 from datetime import datetime
         elif duration_minutes <= 15: return 4000
         else: return 5000
+    async def analyze_pdf(self, file_key: str, duration_minutes: int, model: str = "gpt-4o", progress_callback=None):
         # 1. Get file from S3
         # Since openai files.create needs a file, we download it temporarily
         temp_path = f"temp_{int(time.time())}.pdf"
         try:
             import boto3
+            # Create S3 client and download (non-blocking)
+            def download_from_s3():
+                s3 = boto3.client('s3',
+                                  aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
+                                  aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
+                                  region_name=settings.AWS_REGION)
+                s3.download_file(settings.AWS_S3_BUCKET, file_key, temp_path)
+            await asyncio.to_thread(download_from_s3)
+            # 2. Upload to OpenAI (non-blocking)
+            def upload_to_openai():
+                with open(temp_path, "rb") as f:
+                    return self.openai_client.files.create(file=f, purpose="assistants")
+            file_response = await asyncio.to_thread(upload_to_openai)
+            # 3. Analyze (non-blocking)
             formatted_prompt = ANALYSIS_PROMPT.format(duration_minutes=duration_minutes)
+            response = await asyncio.to_thread(
+                self.openai_client.chat.completions.parse,
                 model=model,
                 messages=[
                     {"role": "system", "content": formatted_prompt},
             return response.choices[0].message.content
         finally:
             if os.path.exists(temp_path):
+                await asyncio.to_thread(os.remove, temp_path)
     async def generate_script(self, user_prompt: str, model: str, duration_minutes: int,
+                             podcast_format: str, pdf_suggestions: str, file_key: Optional[str] = None, progress_callback=None):
         target_words = self.compute_script_targets(duration_minutes)
         formatted_system = SYSTEM_PROMPT.format(
             target_words=target_words,
         temp_path = None
         if file_key:
             temp_path = f"temp_gen_{int(time.time())}.pdf"
+            # Download from S3 (non-blocking)
+            def download_from_s3():
+                import boto3
+                s3 = boto3.client('s3',
+                                  aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
+                                  aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
+                                  region_name=settings.AWS_REGION)
+                s3.download_file(settings.AWS_S3_BUCKET, file_key, temp_path)
+            await asyncio.to_thread(download_from_s3)
+            # Upload to OpenAI (non-blocking)
+            def upload_to_openai():
+                with open(temp_path, "rb") as f:
+                    return self.openai_client.files.create(file=f, purpose="assistants")
+            file_response = await asyncio.to_thread(upload_to_openai)
             messages.append({
                 "role": "user",
             messages.append({"role": "user", "content": user_prompt})
         try:
+            # Call OpenAI API (non-blocking)
+            response = await asyncio.to_thread(
+                self.openai_client.chat.completions.create,
                 model=model,
                 messages=messages,
                 temperature=1.0,
             return response.choices[0].message.content
         finally:
             if temp_path and os.path.exists(temp_path):
+                await asyncio.to_thread(os.remove, temp_path)
     def parse_script(self, script: str) -> List[Tuple[str, str]]:
         dialogs = []
+        # Try English format: "Speaker 1:" or "**Speaker 1**:"
+        pattern_en = re.compile(r"^\*?\*?(Speaker [12])\*?\*?[:：]\s*(.*)$", re.MULTILINE)
+        matches = list(pattern_en.finditer(script))
+        if matches:
+            print(f"[DEBUG] Found {len(matches)} English patterns")
+            for match in matches:
+                speaker, text = match.groups()
+                dialogs.append((speaker, text))
+        else:
+            # Try Japanese format: "スピーカー1:"
+            pattern_jp = re.compile(r"^\*?\*?(スピーカー[12])\*?\*?[:：]\s*(.*)$", re.MULTILINE)
+            matches = list(pattern_jp.finditer(script))
+            if matches:
+                print(f"[DEBUG] Found {len(matches)} Japanese patterns")
+                for match in matches:
+                    speaker_jp, text = match.groups()
+                    speaker_num = "1" if "1" in speaker_jp else "2"
+                    speaker = f"Speaker {speaker_num}"
+                    dialogs.append((speaker, text))
+            else:
+                print(f"[ERROR] No patterns found!")
+                print(f"[DEBUG] Preview: {script[:300]}")
         return dialogs
     def split_script(self, dialogs: List[Tuple[str, str]], chunk_size=20) -> List[str]:
             chunks.append("\n".join([f"{s}: {t}" for s, t in chunk]))
         return chunks
+    async def generate_audio_chunk(self, chunk_script: str, tts_model: str, spk1_voice: str,
                            spk2_voice: str, temperature: float, index: int) -> Optional[str]:
         try:
+            print(f"[DEBUG] Chunk {index}: Starting generation")
+            print(f"[DEBUG] Chunk {index}: Script length: {len(chunk_script)} chars")
             contents = [types.Content(role="user", parts=[types.Part.from_text(text=chunk_script)])]
             config = types.GenerateContentConfig(
                 temperature=temperature,
                 )
             )
+            print(f"[DEBUG] Chunk {index}: Calling Gemini API (Async)...")
             audio_data = None
             mime_type = "audio/wav"
+            # Use client.aio for non-blocking network I/O
+            async for chunk in await self.genai_client.aio.models.generate_content_stream(model=tts_model, contents=contents, config=config):
                 if chunk.candidates and chunk.candidates[0].content.parts:
                     part = chunk.candidates[0].content.parts[0]
                     if part.inline_data:
                         audio_data = part.inline_data.data
                         mime_type = part.inline_data.mime_type
+                        print(f"[DEBUG] Chunk {index}: Received audio data, mime: {mime_type}")
                         break
             if audio_data:
                 # Basic WAV conversion if needed (simplified from original)
                 if "wav" not in mime_type.lower():
+                    print(f"[DEBUG] Chunk {index}: Converting to WAV")
                     # We usually get raw PCM or similar, need header
                     audio_data = self._convert_to_wav(audio_data, mime_type)
                 path = f"chunk_{index}_{int(time.time())}.wav"
                 with open(path, "wb") as f:
                     f.write(audio_data)
+                print(f"[DEBUG] Chunk {index}: Saved to {path}")
                 return path
+            else:
+                print(f"[ERROR] Chunk {index}: No audio data received from Gemini")
         except Exception as e:
+            print(f"[ERROR] Chunk {index}: Exception: {e}")
             logger.error(f"Error generating chunk {index}: {e}")
         return None
         return header + audio_data
     async def generate_full_audio(self, script: str, tts_model: str, spk1_voice: str,
+                                spk2_voice: str, temperature: float, bgm_choice: str, progress_callback=None):
+        print(f"[DEBUG] Starting generate_full_audio")
         dialogs = self.parse_script(script)
+        print(f"[DEBUG] Parsed {len(dialogs)} dialogs")
         chunks = self.split_script(dialogs)
+        print(f"[DEBUG] Split into {len(chunks)} chunks")
+        # Run chunks in parallel using asyncio.gather
+        print(f"[DEBUG] Starting parallel chunk generation...")
+        tasks = []
+        for i, chunk_script in enumerate(chunks):
+            # Now calling the async method directly
+            tasks.append(self.generate_audio_chunk(
+                chunk_script, tts_model, spk1_voice, spk2_voice, temperature, i
+            ))
+        chunk_paths = await asyncio.gather(*tasks)
+        valid_paths = [p for p in chunk_paths if p]
+        print(f"[DEBUG] Valid chunks: {len(valid_paths)} out of {len(chunk_paths)}")
+        if not valid_paths:
+            print(f"[ERROR] No valid audio chunks generated!")
+            return None
+        # Combine - This is heavy processing, run in thread
+        def combine_audio():
+            print(f"[DEBUG] Starting audio combination in thread")
+            combined = AudioSegment.empty()
+            for i, p in enumerate(valid_paths):
+                combined += AudioSegment.from_file(p)
+                combined += AudioSegment.silent(duration=500)
+                try: os.remove(p)
+                except: pass
+            final_path = f"final_podcast_{int(time.time())}.wav"
+            # Mix BGM
+            bgm_path = BGM_CHOICES.get(bgm_choice)
+            if bgm_path and os.path.exists(bgm_path):
+                print(f"[DEBUG] Adding BGM: {bgm_choice}")
+                bgm = AudioSegment.from_file(bgm_path)
+                if len(bgm) < len(combined) + 10000:
+                    bgm = bgm * ( (len(combined) + 10000) // len(bgm) + 1 )
+                bgm = bgm[:len(combined) + 10000]
+                bgm_main = bgm[5000:5000+len(combined)] - 16
+                bgm_intro = bgm[:5000]
+                bgm_outro = bgm[5000+len(combined):].fade_out(5000) - 16
+                bgm_processed = bgm_intro + bgm_main + bgm_outro
+                combined_with_intro = AudioSegment.silent(duration=5000) + combined + AudioSegment.silent(duration=5000)
+                final_audio = combined_with_intro.overlay(bgm_processed)
+                final_audio.export(final_path, format="wav")
+            else:
+                combined.export(final_path, format="wav")
+            return final_path
+        final_path = await asyncio.to_thread(combine_audio)
+        print(f"[DEBUG] Audio generation complete: {final_path}")
         return final_path
 podcast_service = PodcastService()

services/quiz_service.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import json
 import logging
 import os
 import tempfile
-from typing import List, Dict, Optional, Any
 import openai
 from core.config import settings
@@ -22,12 +23,17 @@ class QuizService:
         difficulty: str = "medium",
         topic: Optional[str] = None,
         language: str = "English",
-        count_mode: str = "STANDARD"
     ) -> List[Dict[str, Any]]:
         """
         Generates a quiz from either an S3 PDF or direct text input.
         """
         try:
             # Map count mode to actual numbers
             counts = {
                 "FEWER": "5-10",
@@ -39,23 +45,37 @@ class QuizService:
             system_prompt = get_quiz_system_prompt(language).replace("{NUM_QUESTIONS}", num_range)
             if file_key:
-                # Download PDF from S3
                 tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
                 tmp_path = tmp.name
                 tmp.close()
                 try:
-                    s3_service.s3_client.download_file(
                         settings.AWS_S3_BUCKET,
                         file_key,
                         tmp_path
                     )
-                    with open(tmp_path, "rb") as f:
-                        uploaded_file = self.openai_client.files.create(
-                            file=f,
-                            purpose="assistants"
-                        )
                     user_message = f"Analyze the PDF and create {num_range} questions. Difficulty: {difficulty}."
                     if topic:
@@ -75,21 +95,33 @@ class QuizService:
                         }
                     ]
-                    response = self.openai_client.chat.completions.create(
                         model="gpt-4o-mini",
                         messages=messages,
                         response_format={"type": "json_object"},
                         temperature=0.7
                     )
-                    self.openai_client.files.delete(uploaded_file.id)
                     raw_content = response.choices[0].message.content
                 finally:
                     if os.path.exists(tmp_path):
-                        os.remove(tmp_path)
             elif text_input:
                 user_message = f"Analyze the text and create {num_range} questions. Difficulty: {difficulty}."
                 if topic:
                     user_message += f" Topic: {topic}."
@@ -99,7 +131,10 @@ class QuizService:
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": user_message}
                 ]
-                response = self.openai_client.chat.completions.create(
                     model="gpt-4o-mini",
                     messages=messages,
                     response_format={"type": "json_object"},
@@ -110,6 +145,9 @@ class QuizService:
             else:
                 raise ValueError("Either file_key or text_input must be provided")
             data = json.loads(raw_content)
             # The prompt asks for {"quizzes": [...]}
             return data.get("quizzes", [])

 import json
 import logging
 import os
+import asyncio
 import tempfile
+from typing import List, Dict, Optional, Any, Callable
 import openai
 from core.config import settings
         difficulty: str = "medium",
         topic: Optional[str] = None,
         language: str = "English",
+        count_mode: str = "STANDARD",
+        progress_callback: Optional[Callable[[int, str], None]] = None
     ) -> List[Dict[str, Any]]:
         """
         Generates a quiz from either an S3 PDF or direct text input.
+        Uses asyncio.to_thread for all blocking I/O operations.
         """
         try:
+            if progress_callback:
+                progress_callback(5, "Preparing quiz generation...")
             # Map count mode to actual numbers
             counts = {
                 "FEWER": "5-10",
             system_prompt = get_quiz_system_prompt(language).replace("{NUM_QUESTIONS}", num_range)
             if file_key:
+                if progress_callback:
+                    progress_callback(15, "Downloading file from S3...")
+                # Download PDF from S3 (non-blocking)
                 tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
                 tmp_path = tmp.name
                 tmp.close()
                 try:
+                    await asyncio.to_thread(
+                        s3_service.s3_client.download_file,
                         settings.AWS_S3_BUCKET,
                         file_key,
                         tmp_path
                     )
+                    if progress_callback:
+                        progress_callback(30, "Uploading to OpenAI...")
+                    # Upload to OpenAI (non-blocking)
+                    def upload_to_openai():
+                        with open(tmp_path, "rb") as f:
+                            return self.openai_client.files.create(
+                                file=f,
+                                purpose="assistants"
+                            )
+                    uploaded_file = await asyncio.to_thread(upload_to_openai)
+                    if progress_callback:
+                        progress_callback(45, "Generating quiz questions...")
                     user_message = f"Analyze the PDF and create {num_range} questions. Difficulty: {difficulty}."
                     if topic:
                         }
                     ]
+                    # Call OpenAI API (non-blocking)
+                    response = await asyncio.to_thread(
+                        self.openai_client.chat.completions.create,
                         model="gpt-4o-mini",
                         messages=messages,
                         response_format={"type": "json_object"},
                         temperature=0.7
                     )
+                    if progress_callback:
+                        progress_callback(75, "Cleaning up...")
+                    # Clean up (non-blocking)
+                    await asyncio.to_thread(
+                        self.openai_client.files.delete,
+                        uploaded_file.id
+                    )
                     raw_content = response.choices[0].message.content
                 finally:
                     if os.path.exists(tmp_path):
+                        await asyncio.to_thread(os.remove, tmp_path)
             elif text_input:
+                if progress_callback:
+                    progress_callback(20, "Generating quiz questions...")
                 user_message = f"Analyze the text and create {num_range} questions. Difficulty: {difficulty}."
                 if topic:
                     user_message += f" Topic: {topic}."
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": user_message}
                 ]
+                # Call OpenAI API (non-blocking)
+                response = await asyncio.to_thread(
+                    self.openai_client.chat.completions.create,
                     model="gpt-4o-mini",
                     messages=messages,
                     response_format={"type": "json_object"},
             else:
                 raise ValueError("Either file_key or text_input must be provided")
+            if progress_callback:
+                progress_callback(85, "Parsing results...")
             data = json.loads(raw_content)
             # The prompt asks for {"quizzes": [...]}
             return data.get("quizzes", [])

services/report_service.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import json
 import logging
 import os
 import tempfile
 from typing import List, Dict, Optional, Any
 import openai
@@ -23,28 +24,34 @@ class ReportService:
     ) -> List[Dict[str, str]]:
         """
         Generates 4 AI-suggested report formats based on the content.
         """
         try:
             system_prompt = get_report_suggestion_prompt(language)
             if file_key:
-                # Download PDF from S3
                 tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
                 tmp_path = tmp.name
                 tmp.close()
                 try:
-                    s3_service.s3_client.download_file(
                         settings.AWS_S3_BUCKET,
                         file_key,
                         tmp_path
                     )
-                    with open(tmp_path, "rb") as f:
-                        uploaded_file = self.openai_client.files.create(
-                            file=f,
-                            purpose="assistants"
-                        )
                     messages = [
                         {"role": "system", "content": system_prompt},
@@ -59,26 +66,36 @@ class ReportService:
                         }
                     ]
-                    response = self.openai_client.chat.completions.create(
                         model="gpt-4o-mini",
                         messages=messages,
                         response_format={"type": "json_object"},
                         temperature=0.7
                     )
-                    self.openai_client.files.delete(uploaded_file.id)
                     raw_content = response.choices[0].message.content
                 finally:
                     if os.path.exists(tmp_path):
-                        os.remove(tmp_path)
             elif text_input:
                 messages = [
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": f"Analyze this content:\n\n{text_input}"}
                 ]
-                response = self.openai_client.chat.completions.create(
                     model="gpt-4o-mini",
                     messages=messages,
                     response_format={"type": "json_object"},
@@ -106,6 +123,7 @@ class ReportService:
     ) -> str:
         """
         Generates a full report based on the selected format.
         """
         try:
             base_prompt = get_report_prompt(format_key, custom_prompt or "", language)
@@ -115,7 +133,7 @@ class ReportService:
                 system_prompt = (
                     "あなたは日本語でレポートを作成するAIアシスタントです。すべての回答は日本語で書いてください。\n\n"
                     f"{base_prompt}\n\n"
-                    "重要: レポート全体を日本語で書いてください。回答はマークダウン形式で、適切な見出し、箇条書き、構造を使用して読みやすくフォーマットしてください。"
                 )
             else:
                 system_prompt = (
@@ -125,23 +143,28 @@ class ReportService:
                 )
             if file_key:
-                # Download PDF from S3
                 tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
                 tmp_path = tmp.name
                 tmp.close()
                 try:
-                    s3_service.s3_client.download_file(
                         settings.AWS_S3_BUCKET,
                         file_key,
                         tmp_path
                     )
-                    with open(tmp_path, "rb") as f:
-                        uploaded_file = self.openai_client.files.create(
-                            file=f,
-                            purpose="assistants"
-                        )
                     messages = [
                         {"role": "system", "content": system_prompt},
@@ -156,25 +179,35 @@ class ReportService:
                         }
                     ]
-                    response = self.openai_client.chat.completions.create(
                         model="gpt-4o-mini",
                         messages=messages,
                         temperature=0.7
                     )
-                    self.openai_client.files.delete(uploaded_file.id)
                     return response.choices[0].message.content
                 finally:
                     if os.path.exists(tmp_path):
-                        os.remove(tmp_path)
             elif text_input:
                 messages = [
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": f"Please analyze the following content and generate a report based on it:\n\n{text_input}"}
                 ]
-                response = self.openai_client.chat.completions.create(
                     model="gpt-4o-mini",
                     messages=messages,
                     temperature=0.7

 import json
 import logging
 import os
+import asyncio
 import tempfile
 from typing import List, Dict, Optional, Any
 import openai
     ) -> List[Dict[str, str]]:
         """
         Generates 4 AI-suggested report formats based on the content.
+        Uses asyncio.to_thread for all blocking I/O operations.
         """
         try:
             system_prompt = get_report_suggestion_prompt(language)
             if file_key:
+                # Download PDF from S3 (non-blocking)
                 tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
                 tmp_path = tmp.name
                 tmp.close()
                 try:
+                    await asyncio.to_thread(
+                        s3_service.s3_client.download_file,
                         settings.AWS_S3_BUCKET,
                         file_key,
                         tmp_path
                     )
+                    # Upload to OpenAI (non-blocking)
+                    def upload_to_openai():
+                        with open(tmp_path, "rb") as f:
+                            return self.openai_client.files.create(
+                                file=f,
+                                purpose="assistants"
+                            )
+                    uploaded_file = await asyncio.to_thread(upload_to_openai)
                     messages = [
                         {"role": "system", "content": system_prompt},
                         }
                     ]
+                    # Call OpenAI (non-blocking)
+                    response = await asyncio.to_thread(
+                        self.openai_client.chat.completions.create,
                         model="gpt-4o-mini",
                         messages=messages,
                         response_format={"type": "json_object"},
                         temperature=0.7
                     )
+                    # Clean up OpenAI file (non-blocking)
+                    await asyncio.to_thread(
+                        self.openai_client.files.delete,
+                        uploaded_file.id
+                    )
                     raw_content = response.choices[0].message.content
                 finally:
                     if os.path.exists(tmp_path):
+                        await asyncio.to_thread(os.remove, tmp_path)
             elif text_input:
                 messages = [
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": f"Analyze this content:\n\n{text_input}"}
                 ]
+                # Call OpenAI (non-blocking)
+                response = await asyncio.to_thread(
+                    self.openai_client.chat.completions.create,
                     model="gpt-4o-mini",
                     messages=messages,
                     response_format={"type": "json_object"},
     ) -> str:
         """
         Generates a full report based on the selected format.
+        Uses asyncio.to_thread for all blocking I/O operations.
         """
         try:
             base_prompt = get_report_prompt(format_key, custom_prompt or "", language)
                 system_prompt = (
                     "あなたは日本語でレポートを作成するAIアシスタントです。すべての回答は日本語で書いてください。\n\n"
                     f"{base_prompt}\n\n"
+                    "重要: レポート全体を日本語で書いてください。回答はマークダウン形式で、適切な見出し、箇跨書き、構造を使用して読みやすくフォーマットしてください。"
                 )
             else:
                 system_prompt = (
                 )
             if file_key:
+                # Download PDF from S3 (non-blocking)
                 tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
                 tmp_path = tmp.name
                 tmp.close()
                 try:
+                    await asyncio.to_thread(
+                        s3_service.s3_client.download_file,
                         settings.AWS_S3_BUCKET,
                         file_key,
                         tmp_path
                     )
+                    # Upload to OpenAI (non-blocking)
+                    def upload_to_openai():
+                        with open(tmp_path, "rb") as f:
+                            return self.openai_client.files.create(
+                                file=f,
+                                purpose="assistants"
+                            )
+                    uploaded_file = await asyncio.to_thread(upload_to_openai)
                     messages = [
                         {"role": "system", "content": system_prompt},
                         }
                     ]
+                    # Call OpenAI (non-blocking)
+                    response = await asyncio.to_thread(
+                        self.openai_client.chat.completions.create,
                         model="gpt-4o-mini",
                         messages=messages,
                         temperature=0.7
                     )
+                    # Clean up OpenAI (non-blocking)
+                    await asyncio.to_thread(
+                        self.openai_client.files.delete,
+                        uploaded_file.id
+                    )
                     return response.choices[0].message.content
                 finally:
                     if os.path.exists(tmp_path):
+                        await asyncio.to_thread(os.remove, tmp_path)
             elif text_input:
                 messages = [
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": f"Please analyze the following content and generate a report based on it:\n\n{text_input}"}
                 ]
+                # Call OpenAI (non-blocking)
+                response = await asyncio.to_thread(
+                    self.openai_client.chat.completions.create,
                     model="gpt-4o-mini",
                     messages=messages,
                     temperature=0.7