Spaces:

Hammad712
/

Video-Rag

Runtime error

App Files Files Community

Hammad712 commited on Mar 19

Commit

3e1a2e1

verified ·

1 Parent(s): 77de2d6

Update main.py

Browse files

Files changed (1) hide show

main.py +447 -61

main.py CHANGED Viewed

@@ -1,16 +1,26 @@
-from fastapi import FastAPI, HTTPException, Body, Query, File, UploadFile, Form
 from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel
 from typing import List, Optional, Dict, Any, Union
 import uuid
 import os
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
-# Import necessary libraries
-from langchain_community.embeddings import HuggingFaceBgeEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain.chains import ConversationalRetrievalChain
 from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
@@ -20,8 +30,26 @@ from langchain_groq import ChatGroq
 from google import genai
 from google.genai import types
 # Initialize FastAPI app
-app = FastAPI(title="RAG System API", description="An API for question answering based on YouTube video content or uploaded video files")
 # Configure CORS
 app.add_middleware(
@@ -38,15 +66,169 @@ class TranscriptionRequest(BaseModel):
 class QueryRequest(BaseModel):
     query: str
-    session_id: Optional[str] = None
 class QueryResponse(BaseModel):
     answer: str
     session_id: str
     source_documents: Optional[List[str]] = None
-# Global variables
-sessions = {}
 # Initialize Google API client
 def init_google_client():
@@ -59,7 +241,7 @@ def init_google_client():
 def get_llm():
     """
     Returns the language model instance (LLM) using ChatGroq API.
-    The LLM used is Llama 3.1 with a versatile 70 billion parameters model.
     """
     api_key = os.getenv("GROQ_API_KEY", "")
     if not api_key:
@@ -78,7 +260,7 @@ def get_embeddings():
     model_name = "BAAI/bge-small-en"
     model_kwargs = {"device": "cpu"}
     encode_kwargs = {"normalize_embeddings": True}
-    embeddings = HuggingFaceBgeEmbeddings(
         model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
     )
     return embeddings
@@ -125,7 +307,7 @@ def create_chain(retriever):
     return chain
 # Process transcription and prepare RAG system
-def process_transcription(transcription):
     # Process the transcription
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=20)
     all_splits = text_splitter.split_text(transcription)
@@ -138,17 +320,77 @@ def process_transcription(transcription):
     # Create a session ID
     session_id = str(uuid.uuid4())
-    # Store session data
     sessions[session_id] = {
         "retriever": retriever,
-        "chat_history": [],
-        "transcription": transcription
     }
     return session_id
 @app.post("/transcribe", response_model=Dict[str, str])
-async def transcribe_video(request: TranscriptionRequest):
     """
     Transcribe a YouTube video and prepare the RAG system
     """
@@ -173,7 +415,14 @@ async def transcribe_video(request: TranscriptionRequest):
         transcription = response.candidates[0].content.parts[0].text
         # Process transcription and get session ID
-        session_id = process_transcription(transcription)
         return {"session_id": session_id, "message": "YouTube video transcribed and RAG system prepared"}
@@ -181,14 +430,21 @@ async def transcribe_video(request: TranscriptionRequest):
         raise HTTPException(status_code=500, detail=f"Error transcribing video: {str(e)}")
 @app.post("/upload", response_model=Dict[str, str])
-async def upload_video(file: UploadFile = File(...), prompt: str = Form("Transcribe the Video. Write all the things described in the video")):
     """
     Upload a video file (max 20MB), transcribe it and prepare the RAG system
     """
     try:
         # Check file size (20MB limit)
         contents = await file.read()
-        if len(contents) > 20 * 1024 * 1024:  # 20MB in bytes
             raise HTTPException(status_code=400, detail="File size exceeds 20MB limit")
         # Check file type
@@ -215,7 +471,19 @@ async def upload_video(file: UploadFile = File(...), prompt: str = Form("Transcr
         transcription = response.candidates[0].content.parts[0].text
         # Process transcription and get session ID
-        session_id = process_transcription(transcription)
         return {"session_id": session_id, "message": "Uploaded video transcribed and RAG system prepared"}
@@ -225,18 +493,73 @@ async def upload_video(file: UploadFile = File(...), prompt: str = Form("Transcr
         # Reset file pointer
         await file.seek(0)
 @app.post("/query", response_model=QueryResponse)
-async def query_system(request: QueryRequest):
     """
     Query the RAG system with a question
     """
     try:
         session_id = request.session_id
-        # Create a new session if none provided
         if not session_id or session_id not in sessions:
             raise HTTPException(status_code=404, detail="Session not found. Please transcribe a video first.")
         # Get session data
         session = sessions[session_id]
         retriever = session["retriever"]
@@ -245,11 +568,17 @@ async def query_system(request: QueryRequest):
         # Create chain
         chain = create_chain(retriever)
         # Query the chain
-        result = chain({"question": request.query, "chat_history": chat_history})
         # Update chat history
-        chat_history.append((request.query, result["answer"]))
         # Prepare source documents
         source_docs = [doc.page_content[:100] + "..." for doc in result.get("source_documents", [])]
@@ -263,31 +592,107 @@ async def query_system(request: QueryRequest):
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error querying system: {str(e)}")
 @app.get("/sessions/{session_id}", response_model=Dict[str, Any])
-async def get_session_info(session_id: str):
     """
     Get information about a specific session
     """
-    if session_id not in sessions:
         raise HTTPException(status_code=404, detail="Session not found")
-    session = sessions[session_id]
     return {
         "session_id": session_id,
-        "chat_history_length": len(session["chat_history"]),
-        "transcription_preview": session["transcription"][:200] + "..."
     }
 @app.delete("/sessions/{session_id}")
-async def delete_session(session_id: str):
     """
     Delete a session
     """
-    if session_id not in sessions:
         raise HTTPException(status_code=404, detail="Session not found")
-    del sessions[session_id]
     return {"message": f"Session {session_id} deleted successfully"}
 @app.get("/")
@@ -298,43 +703,24 @@ async def root():
     return {
         "message": "Video Transcription and QA API",
         "endpoints": {
             "/transcribe": "Transcribe YouTube videos",
             "/upload": "Upload and transcribe video files (max 20MB)",
             "/query": "Query the RAG system",
             "/sessions/{session_id}": "Get session information",
         }
     }
-@app.route('/transcribe-audio', methods=['POST'])
-def transcribe_audio():
-    if 'audio' not in request.files:
-        return jsonify({"error": "No audio file provided"}), 400
-    audio_file = request.files['audio']
-    # Save the uploaded file temporarily
-    temp_path = os.path.join(os.path.dirname(__file__), "temp_audio.m4a")
-    audio_file.save(temp_path)
-    try:
-        # Use Groq client to transcribe the audio
-        with open(temp_path, "rb") as file:
-            transcription = client.audio.transcriptions.create(
-                file=(temp_path, file.read()),
-                model="whisper-large-v3",
-                response_format="verbose_json",
-            )
-        # Return the transcription result
-        return jsonify({"transcription": transcription.text})
-    except Exception as e:
-        return jsonify({"error": str(e)}), 500
-    finally:
-        # Clean up the temporary file
-        if os.path.exists(temp_path):
-            os.remove(temp_path)
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=8000)

+from fastapi import FastAPI, HTTPException, Depends, File, UploadFile, Form, Response, BackgroundTasks
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel, Field, EmailStr
 from typing import List, Optional, Dict, Any, Union
 import uuid
 import os
+import io
+import shutil
+from datetime import datetime, timedelta
 from dotenv import load_dotenv
+import hashlib
+import jwt
+from passlib.context import CryptContext
+from pymongo import MongoClient
+from langchain_mongodb.chat_message_histories import MongoDBChatMessageHistory
 # Load environment variables
 load_dotenv()
+# Import necessary libraries - updating deprecated imports
+from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain.chains import ConversationalRetrievalChain
 from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
 from google import genai
 from google.genai import types
+# MongoDB Configuration
+MONGO_URI = os.getenv("MONGO_URI", "mongodb://localhost:27017")
+DATABASE_NAME = os.getenv("MONGO_DB_NAME", "rag_system")
+CHAT_COLLECTION = "chat_history"
+USER_COLLECTION = "users"
+VIDEO_COLLECTION = "videos"
+# Security
+SECRET_KEY = os.getenv("SECRET_KEY", "your_secret_key_here")
+ALGORITHM = "HS256"
+ACCESS_TOKEN_EXPIRE_MINUTES = 30
+# Password hashing
+pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
+# OAuth2 scheme
+oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
 # Initialize FastAPI app
+app = FastAPI(title="RAG System API", description="An API for question answering based on video content with user authentication")
 # Configure CORS
 app.add_middleware(
 class QueryRequest(BaseModel):
     query: str
+    session_id: str
 class QueryResponse(BaseModel):
     answer: str
     session_id: str
     source_documents: Optional[List[str]] = None
+class User(BaseModel):
+    username: str
+    email: EmailStr
+    full_name: Optional[str] = None
+class UserInDB(User):
+    hashed_password: str
+class UserCreate(User):
+    password: str
+class Token(BaseModel):
+    access_token: str
+    token_type: str
+class TokenData(BaseModel):
+    username: Optional[str] = None
+class VideoData(BaseModel):
+    video_id: str
+    user_id: str
+    title: str
+    source_type: str  # "youtube" or "upload"
+    source_url: Optional[str] = None
+    created_at: datetime = Field(default_factory=datetime.utcnow)
+    transcription: str
+    size: Optional[int] = None
+# MongoDB connection and chat management
+class MongoDB:
+    def __init__(self):
+        self.client = MongoClient(MONGO_URI)
+        self.db = self.client[DATABASE_NAME]
+        self.users = self.db[USER_COLLECTION]
+        self.videos = self.db[VIDEO_COLLECTION]
+        # Ensure indexes
+        self.users.create_index("username", unique=True)
+        self.users.create_index("email", unique=True)
+        self.videos.create_index("video_id", unique=True)
+        self.videos.create_index("user_id")
+    def close(self):
+        self.client.close()
+# Chat Management Class
+class ChatManagement:
+    def __init__(self, cluster_url, database_name, collection_name):
+        self.connection_string = cluster_url
+        self.database_name = database_name
+        self.collection_name = collection_name
+        self.chat_sessions = {}  # Dictionary to store chat history objects for each session
+    def create_new_chat(self):
+        # Generate a unique chat ID
+        chat_id = str(uuid.uuid4())
+        # Initialize MongoDBChatMessageHistory for the chat session
+        chat_message_history = MongoDBChatMessageHistory(
+            session_id=chat_id,
+            connection_string=self.connection_string,
+            database_name=self.database_name,
+            collection_name=self.collection_name
+        )
+        # Store the chat_message_history object in the session dictionary
+        self.chat_sessions[chat_id] = chat_message_history
+        return chat_id
+    def get_chat_history(self, chat_id):
+        # Check if the chat session is already in memory
+        if chat_id in self.chat_sessions:
+            return self.chat_sessions[chat_id]
+        # If not in memory, try to fetch from the database
+        chat_message_history = MongoDBChatMessageHistory(
+            session_id=chat_id,
+            connection_string=self.connection_string,
+            database_name=self.database_name,
+            collection_name=self.collection_name
+        )
+        if chat_message_history.messages:  # Check if the session exists in the database
+            self.chat_sessions[chat_id] = chat_message_history
+            return chat_message_history
+        return None  # Chat session not found
+    def initialize_chat_history(self, chat_id):
+        # If the chat history already exists, return it
+        if chat_id in self.chat_sessions:
+            return self.chat_sessions[chat_id]
+        # Otherwise, create a new chat history
+        chat_message_history = MongoDBChatMessageHistory(
+            session_id=chat_id,
+            connection_string=self.connection_string,
+            database_name=self.database_name,
+            collection_name=self.collection_name
+        )
+        # Save the new chat session to the session dictionary
+        self.chat_sessions[chat_id] = chat_message_history
+        return chat_message_history
+# Global variables and instances
+mongodb = MongoDB()
+chat_manager = ChatManagement(MONGO_URI, DATABASE_NAME, CHAT_COLLECTION)
+sessions = {}  # In-memory session storage for retrievers
+# Video directory for temporary storage
+VIDEOS_DIR = "temp_videos"
+os.makedirs(VIDEOS_DIR, exist_ok=True)
+# Security functions
+def verify_password(plain_password, hashed_password):
+    return pwd_context.verify(plain_password, hashed_password)
+def get_password_hash(password):
+    return pwd_context.hash(password)
+def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
+    to_encode = data.copy()
+    if expires_delta:
+        expire = datetime.utcnow() + expires_delta
+    else:
+        expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
+    to_encode.update({"exp": expire})
+    encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
+    return encoded_jwt
+def get_user(username: str):
+    user_data = mongodb.users.find_one({"username": username})
+    if user_data:
+        return UserInDB(**user_data)
+    return None
+def authenticate_user(username: str, password: str):
+    user = get_user(username)
+    if not user:
+        return False
+    if not verify_password(password, user.hashed_password):
+        return False
+    return user
+async def get_current_user(token: str = Depends(oauth2_scheme)):
+    credentials_exception = HTTPException(
+        status_code=401,
+        detail="Could not validate credentials",
+        headers={"WWW-Authenticate": "Bearer"},
+    )
+    try:
+        payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
+        username: str = payload.get("sub")
+        if username is None:
+            raise credentials_exception
+        token_data = TokenData(username=username)
+    except jwt.PyJWTError:
+        raise credentials_exception
+    user = get_user(username=token_data.username)
+    if user is None:
+        raise credentials_exception
+    return user
 # Initialize Google API client
 def init_google_client():
 def get_llm():
     """
     Returns the language model instance (LLM) using ChatGroq API.
+    The LLM used is Llama 3.3 with a versatile 70 billion parameters model.
     """
     api_key = os.getenv("GROQ_API_KEY", "")
     if not api_key:
     model_name = "BAAI/bge-small-en"
     model_kwargs = {"device": "cpu"}
     encode_kwargs = {"normalize_embeddings": True}
+    embeddings = HuggingFaceEmbeddings(
         model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
     )
     return embeddings
     return chain
 # Process transcription and prepare RAG system
+def process_transcription(transcription, user_id, title, source_type, source_url=None, file_size=None):
     # Process the transcription
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=20)
     all_splits = text_splitter.split_text(transcription)
     # Create a session ID
     session_id = str(uuid.uuid4())
+    # Store video data in MongoDB
+    video_data = {
+        "video_id": session_id,
+        "user_id": user_id,
+        "title": title,
+        "source_type": source_type,
+        "source_url": source_url,
+        "created_at": datetime.utcnow(),
+        "transcription": transcription,
+        "size": file_size
+    }
+    mongodb.videos.insert_one(video_data)
+    # Store session data in memory
     sessions[session_id] = {
         "retriever": retriever,
+        "chat_history": chat_manager.initialize_chat_history(session_id)
     }
     return session_id
+# Save video to disk (background task)
+def save_video_file(video_id, file_path, contents):
+    os.makedirs(os.path.dirname(file_path), exist_ok=True)
+    with open(file_path, "wb") as f:
+        f.write(contents)
+# Auth endpoints
+@app.post("/register", response_model=User)
+async def register_user(user: UserCreate):
+    # Check if username already exists
+    if mongodb.users.find_one({"username": user.username}):
+        raise HTTPException(status_code=400, detail="Username already registered")
+    # Check if email already exists
+    if mongodb.users.find_one({"email": user.email}):
+        raise HTTPException(status_code=400, detail="Email already registered")
+    # Create user
+    hashed_password = get_password_hash(user.password)
+    user_dict = user.dict()
+    del user_dict["password"]
+    user_dict["hashed_password"] = hashed_password
+    # Insert user
+    mongodb.users.insert_one(user_dict)
+    return User(**user_dict)
+@app.post("/token", response_model=Token)
+async def login_for_access_token(form_data: OAuth2PasswordRequestForm = Depends()):
+    user = authenticate_user(form_data.username, form_data.password)
+    if not user:
+        raise HTTPException(
+            status_code=401,
+            detail="Incorrect username or password",
+            headers={"WWW-Authenticate": "Bearer"},
+        )
+    access_token_expires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
+    access_token = create_access_token(
+        data={"sub": user.username}, expires_delta=access_token_expires
+    )
+    return {"access_token": access_token, "token_type": "bearer"}
+# Video processing endpoints
 @app.post("/transcribe", response_model=Dict[str, str])
+async def transcribe_video(
+    request: TranscriptionRequest,
+    current_user: User = Depends(get_current_user)
+):
     """
     Transcribe a YouTube video and prepare the RAG system
     """
         transcription = response.candidates[0].content.parts[0].text
         # Process transcription and get session ID
+        video_title = f"YouTube Video - {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')}"
+        session_id = process_transcription(
+            transcription,
+            current_user.username,
+            video_title,
+            "youtube",
+            request.youtube_url
+        )
         return {"session_id": session_id, "message": "YouTube video transcribed and RAG system prepared"}
         raise HTTPException(status_code=500, detail=f"Error transcribing video: {str(e)}")
 @app.post("/upload", response_model=Dict[str, str])
+async def upload_video(
+    background_tasks: BackgroundTasks,
+    title: str = Form(...),
+    file: UploadFile = File(...),
+    prompt: str = Form("Transcribe the Video. Write all the things described in the video"),
+    current_user: User = Depends(get_current_user)
+):
     """
     Upload a video file (max 20MB), transcribe it and prepare the RAG system
     """
     try:
         # Check file size (20MB limit)
         contents = await file.read()
+        file_size = len(contents)
+        if file_size > 20 * 1024 * 1024:  # 20MB in bytes
             raise HTTPException(status_code=400, detail="File size exceeds 20MB limit")
         # Check file type
         transcription = response.candidates[0].content.parts[0].text
         # Process transcription and get session ID
+        session_id = process_transcription(
+            transcription,
+            current_user.username,
+            title,
+            "upload",
+            None,
+            file_size
+        )
+        # Save video file to disk
+        file_extension = os.path.splitext(file.filename)[1]
+        file_path = os.path.join(VIDEOS_DIR, f"{session_id}{file_extension}")
+        background_tasks.add_task(save_video_file, session_id, file_path, contents)
         return {"session_id": session_id, "message": "Uploaded video transcribed and RAG system prepared"}
         # Reset file pointer
         await file.seek(0)
+@app.get("/download/{video_id}")
+async def download_video(
+    video_id: str,
+    current_user: User = Depends(get_current_user)
+):
+    """
+    Download a previously uploaded video
+    """
+    # Check if video exists in database
+    video_data = mongodb.videos.find_one({"video_id": video_id})
+    if not video_data:
+        raise HTTPException(status_code=404, detail="Video not found")
+    # Check if user has access to this video
+    if video_data["user_id"] != current_user.username:
+        raise HTTPException(status_code=403, detail="Not authorized to access this video")
+    # For YouTube videos, we don't have the actual file
+    if video_data["source_type"] == "youtube":
+        return {"message": "This is a YouTube video. Please use the original URL to access the video.", "url": video_data["source_url"]}
+    # For uploaded videos, check if file exists
+    # Look for any file with the video_id as the base name
+    video_files = [f for f in os.listdir(VIDEOS_DIR) if f.startswith(video_id)]
+    if not video_files:
+        raise HTTPException(status_code=404, detail="Video file not found")
+    file_path = os.path.join(VIDEOS_DIR, video_files[0])
+    # Determine file extension and MIME type
+    file_extension = os.path.splitext(video_files[0])[1]
+    mime_type = f"video/{file_extension[1:]}" if file_extension else "video/mp4"
+    # Stream the file
+    def iterfile():
+        with open(file_path, "rb") as f:
+            while chunk := f.read(8192):
+                yield chunk
+    return StreamingResponse(
+        iterfile(),
+        media_type=mime_type,
+        headers={"Content-Disposition": f"attachment; filename={video_data['title']}{file_extension}"}
+    )
 @app.post("/query", response_model=QueryResponse)
+async def query_system(
+    request: QueryRequest,
+    current_user: User = Depends(get_current_user)
+):
     """
     Query the RAG system with a question
     """
     try:
         session_id = request.session_id
+        # Check if session exists
         if not session_id or session_id not in sessions:
             raise HTTPException(status_code=404, detail="Session not found. Please transcribe a video first.")
+        # Check if user has access to this session
+        video_data = mongodb.videos.find_one({"video_id": session_id})
+        if not video_data or video_data["user_id"] != current_user.username:
+            raise HTTPException(status_code=403, detail="Not authorized to access this session")
         # Get session data
         session = sessions[session_id]
         retriever = session["retriever"]
         # Create chain
         chain = create_chain(retriever)
+        # Get chat history from MongoDB in LangChain format
+        messages = chat_history.messages
+        langchain_chat_history = [(messages[i].content, messages[i+1].content)
+                                for i in range(0, len(messages)-1, 2) if i+1 < len(messages)]
         # Query the chain
+        result = chain.invoke({"question": request.query, "chat_history": langchain_chat_history})
         # Update chat history
+        chat_history.add_user_message(request.query)
+        chat_history.add_ai_message(result["answer"])
         # Prepare source documents
         source_docs = [doc.page_content[:100] + "..." for doc in result.get("source_documents", [])]
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error querying system: {str(e)}")
+@app.get("/sessions", response_model=List[Dict[str, Any]])
+async def get_user_sessions(current_user: User = Depends(get_current_user)):
+    """
+    Get all video sessions for the current user
+    """
+    user_videos = list(mongodb.videos.find({"user_id": current_user.username}))
+    # Format response
+    sessions_list = []
+    for video in user_videos:
+        sessions_list.append({
+            "session_id": video["video_id"],
+            "title": video["title"],
+            "source_type": video["source_type"],
+            "created_at": video["created_at"],
+            "transcription_preview": video["transcription"][:200] + "..." if len(video["transcription"]) > 200 else video["transcription"]
+        })
+    return sessions_list
 @app.get("/sessions/{session_id}", response_model=Dict[str, Any])
+async def get_session_info(
+    session_id: str,
+    current_user: User = Depends(get_current_user)
+):
     """
     Get information about a specific session
     """
+    # Check if session exists in database
+    video_data = mongodb.videos.find_one({"video_id": session_id})
+    if not video_data:
         raise HTTPException(status_code=404, detail="Session not found")
+    # Check if user has access to this session
+    if video_data["user_id"] != current_user.username:
+        raise HTTPException(status_code=403, detail="Not authorized to access this session")
+    # Get chat history
+    chat_history_obj = chat_manager.get_chat_history(session_id)
+    chat_messages = []
+    if chat_history_obj:
+        messages = chat_history_obj.messages
+        for i in range(0, len(messages), 2):
+            if i+1 < len(messages):
+                chat_messages.append({
+                    "question": messages[i].content,
+                    "answer": messages[i+1].content
+                })
     return {
         "session_id": session_id,
+        "title": video_data["title"],
+        "source_type": video_data["source_type"],
+        "source_url": video_data.get("source_url"),
+        "created_at": video_data["created_at"],
+        "transcription_preview": video_data["transcription"][:200] + "..." if len(video_data["transcription"]) > 200 else video_data["transcription"],
+        "full_transcription": video_data["transcription"],
+        "chat_history": chat_messages
     }
 @app.delete("/sessions/{session_id}")
+async def delete_session(
+    session_id: str,
+    current_user: User = Depends(get_current_user)
+):
     """
     Delete a session
     """
+    # Check if session exists in database
+    video_data = mongodb.videos.find_one({"video_id": session_id})
+    if not video_data:
         raise HTTPException(status_code=404, detail="Session not found")
+    # Check if user has access to this session
+    if video_data["user_id"] != current_user.username:
+        raise HTTPException(status_code=403, detail="Not authorized to access this session")
+    # Delete from MongoDB
+    mongodb.videos.delete_one({"video_id": session_id})
+    # Delete chat history
+    chat_history = chat_manager.get_chat_history(session_id)
+    if chat_history:
+        # This will delete all messages with this session_id from MongoDB
+        mongodb.db[CHAT_COLLECTION].delete_many({"session_id": session_id})
+    # Remove from in-memory sessions
+    if session_id in sessions:
+        del sessions[session_id]
+    # Delete video file if it exists
+    video_files = [f for f in os.listdir(VIDEOS_DIR) if f.startswith(session_id)]
+    for file in video_files:
+        try:
+            os.remove(os.path.join(VIDEOS_DIR, file))
+        except:
+            pass
     return {"message": f"Session {session_id} deleted successfully"}
 @app.get("/")
     return {
         "message": "Video Transcription and QA API",
         "endpoints": {
+            "/register": "Register a new user",
+            "/token": "Login and get access token",
             "/transcribe": "Transcribe YouTube videos",
             "/upload": "Upload and transcribe video files (max 20MB)",
+            "/download/{video_id}": "Download an uploaded video",
             "/query": "Query the RAG system",
+            "/sessions": "List all user sessions",
             "/sessions/{session_id}": "Get session information",
         }
     }
+@app.on_event("shutdown")
+def shutdown_event():
+    mongodb.close()
+    # Clean up temporary files
+    shutil.rmtree(VIDEOS_DIR, ignore_errors=True)
 if __name__ == "__main__":
     import uvicorn
+    os.environ["TOKENIZERS_PARALLELISM"] = "false"  # Fix for the tokenizers warning
     uvicorn.run(app, host="0.0.0.0", port=8000)