Spaces:

dataera2013
/

midterm

Sleeping

App Files Files Community

Nagesh Muralidhar commited on Feb 24, 2025

Commit

4972b4c

1 Parent(s): 6c71315

midterm-submission

Browse files

Files changed (2) hide show

podcraft/src/pages/PodcastForm.tsx +19 -12
server/main.py +21 -11

podcraft/src/pages/PodcastForm.tsx CHANGED Viewed

@@ -44,7 +44,10 @@ const PodcastForm: React.FC = () => {
   useEffect(() => {
     const fetchPodcastAndContext = async () => {
       try {
-        if (!id) return;
         // Fetch podcast details
         const response = await fetch(`${API_URL}/api/audio-list`);
@@ -69,7 +72,7 @@ const PodcastForm: React.FC = () => {
           const category = categoryWithExt.replace('.mp3', '');
           return {
-            id: index + 1,
             title: descriptionPart.replace(/_/g, ' ').replace(/^\w/, c => c.toUpperCase()),
             description: `A debate exploring ${queryPart.replace(/_/g, ' ')}`,
             audio_file: `${API_URL}${file.path}`,
@@ -79,15 +82,18 @@ const PodcastForm: React.FC = () => {
         });
         const selectedPodcast = podcastList.find(p => p.id === parseInt(id));
-        if (selectedPodcast) {
-          setPodcast(selectedPodcast);
-          // Fetch podcast context
-          const contextResponse = await fetch(`${API_URL}/api/podcast/${id}/context`);
-          if (contextResponse.ok) {
-            const contextData: PodcastContext = await contextResponse.json();
-            setPodcastContext(contextData);
-          }
         }
       } catch (err) {
         console.error('Error fetching podcast:', err);
@@ -124,7 +130,8 @@ const PodcastForm: React.FC = () => {
       });
       if (!response.ok) {
-        throw new Error(`Server error: ${response.status}`);
       }
       const data = await response.json();

   useEffect(() => {
     const fetchPodcastAndContext = async () => {
       try {
+        if (!id) {
+          setError("No podcast ID provided");
+          return;
+        }
         // Fetch podcast details
         const response = await fetch(`${API_URL}/api/audio-list`);
           const category = categoryWithExt.replace('.mp3', '');
           return {
+            id: index + 1, // Use 1-based index for consistency
             title: descriptionPart.replace(/_/g, ' ').replace(/^\w/, c => c.toUpperCase()),
             description: `A debate exploring ${queryPart.replace(/_/g, ' ')}`,
             audio_file: `${API_URL}${file.path}`,
         });
         const selectedPodcast = podcastList.find(p => p.id === parseInt(id));
+        if (!selectedPodcast) {
+          throw new Error(`Podcast with ID ${id} not found`);
+        }
+        setPodcast(selectedPodcast);
+        // Fetch podcast context
+        const contextResponse = await fetch(`${API_URL}/api/podcast/${id}/context`);
+        if (contextResponse.ok) {
+          const contextData: PodcastContext = await contextResponse.json();
+          setPodcastContext(contextData);
+        } else {
+          console.warn(`Could not fetch context for podcast ${id}`);
         }
       } catch (err) {
         console.error('Error fetching podcast:', err);
       });
       if (!response.ok) {
+        const errorData = await response.text();
+        throw new Error(`Server error: ${response.status} ${errorData}`);
       }
       const data = await response.json();

server/main.py CHANGED Viewed

@@ -333,7 +333,7 @@ async def get_podcast_context(podcast_id: str):
         logger.error(f"Error in get_podcast_context: {str(e)}", exc_info=True)
         raise HTTPException(status_code=500, detail=str(e))
-@api_router.post("/podcast-chat/{podcast_id}", response_model=PodcastChatResponse)
 async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
     """Handle chat messages for a specific podcast."""
     try:
@@ -342,34 +342,39 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
         # Path to transcripts file
         transcripts_file = os.path.join(os.path.dirname(__file__), "transcripts", "podcasts.json")
-        # Check if transcripts file exists and initialize if needed
         if not os.path.exists(transcripts_file):
-            logger.warning("Transcripts file not found, initializing empty file")
-            with open(transcripts_file, 'w') as f:
-                json.dump([], f)
-            raise HTTPException(status_code=404, detail="No transcript available for this podcast yet")
         # Read transcripts
         try:
             with open(transcripts_file, 'r') as f:
                 transcripts = json.load(f)
         except json.JSONDecodeError as e:
-            logger.error(f"Error reading transcripts file: {str(e)}")
-            raise HTTPException(status_code=500, detail="Error reading podcast transcript")
         # Convert podcast_id to zero-based index
         try:
             podcast_index = int(podcast_id) - 1
             if podcast_index < 0 or podcast_index >= len(transcripts):
                 raise ValueError(f"Invalid podcast ID: {podcast_id}")
         except ValueError as e:
             raise HTTPException(status_code=404, detail=str(e))
         # Get podcast transcript
         try:
             podcast_transcript = transcripts[podcast_index].get("podcastScript")
             if not podcast_transcript:
                 raise HTTPException(status_code=404, detail="No transcript content found for this podcast")
         except (IndexError, KeyError) as e:
             logger.error(f"Error accessing podcast transcript: {str(e)}")
             raise HTTPException(status_code=404, detail="Transcript not found for this podcast")
@@ -383,8 +388,10 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
         # Use split_text for strings instead of split_documents
         chunks = text_splitter.split_text(podcast_transcript)
         if not chunks:
             raise HTTPException(status_code=404, detail="No content chunks found in transcript")
         # Initialize embedding model
@@ -400,6 +407,7 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
             location=":memory:",  # Use in-memory storage
             collection_name=collection_name
         )
         # Configure the retriever with search parameters
         qdrant_retriever = vectorstore.as_retriever(
@@ -428,7 +436,8 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
         # Add logging for the retrieved documents and final prompt
         def get_context_and_log(input_dict):
-            context = format_docs(qdrant_retriever.get_relevant_documents(input_dict["question"]))
             logger.info("Retrieved context from podcast:")
             logger.info("-" * 50)
             logger.info(f"Context:\n{context}")
@@ -447,11 +456,12 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
         # Get response
         response = chain.invoke({"question": request.message})
         return PodcastChatResponse(response=response.content)
-    except HTTPException as he:
-        raise he
     except Exception as e:
         logger.error(f"Error in podcast chat: {str(e)}", exc_info=True)
         raise HTTPException(status_code=500, detail=str(e))

         logger.error(f"Error in get_podcast_context: {str(e)}", exc_info=True)
         raise HTTPException(status_code=500, detail=str(e))
+@app.post("/podcast-chat/{podcast_id}", response_model=PodcastChatResponse)
 async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
     """Handle chat messages for a specific podcast."""
     try:
         # Path to transcripts file
         transcripts_file = os.path.join(os.path.dirname(__file__), "transcripts", "podcasts.json")
+        # Check if transcripts file exists
         if not os.path.exists(transcripts_file):
+            logger.error("Transcripts file not found")
+            raise HTTPException(status_code=404, detail="Transcripts file not found")
         # Read transcripts
         try:
             with open(transcripts_file, 'r') as f:
                 transcripts = json.load(f)
+                logger.info(f"Loaded {len(transcripts)} transcripts")
         except json.JSONDecodeError as e:
+            logger.error(f"Error decoding transcripts file: {str(e)}")
+            raise HTTPException(status_code=500, detail="Error reading transcripts file")
         # Convert podcast_id to zero-based index
         try:
             podcast_index = int(podcast_id) - 1
             if podcast_index < 0 or podcast_index >= len(transcripts):
+                logger.error(f"Invalid podcast index: {podcast_index} (total transcripts: {len(transcripts)})")
                 raise ValueError(f"Invalid podcast ID: {podcast_id}")
         except ValueError as e:
+            logger.error(f"Error converting podcast ID: {str(e)}")
             raise HTTPException(status_code=404, detail=str(e))
         # Get podcast transcript
         try:
             podcast_transcript = transcripts[podcast_index].get("podcastScript")
             if not podcast_transcript:
+                logger.error(f"No transcript content found for podcast {podcast_id}")
                 raise HTTPException(status_code=404, detail="No transcript content found for this podcast")
+            logger.info(f"Found transcript for podcast {podcast_id}")
+            logger.debug(f"Transcript content: {podcast_transcript[:200]}...")  # Log first 200 chars
         except (IndexError, KeyError) as e:
             logger.error(f"Error accessing podcast transcript: {str(e)}")
             raise HTTPException(status_code=404, detail="Transcript not found for this podcast")
         # Use split_text for strings instead of split_documents
         chunks = text_splitter.split_text(podcast_transcript)
+        logger.info(f"Split transcript into {len(chunks)} chunks")
         if not chunks:
+            logger.error("No content chunks found in transcript")
             raise HTTPException(status_code=404, detail="No content chunks found in transcript")
         # Initialize embedding model
             location=":memory:",  # Use in-memory storage
             collection_name=collection_name
         )
+        logger.info(f"Created vector store for podcast {podcast_id}")
         # Configure the retriever with search parameters
         qdrant_retriever = vectorstore.as_retriever(
         # Add logging for the retrieved documents and final prompt
         def get_context_and_log(input_dict):
+            retrieved_docs = qdrant_retriever.get_relevant_documents(input_dict["question"])
+            context = format_docs(retrieved_docs)
             logger.info("Retrieved context from podcast:")
             logger.info("-" * 50)
             logger.info(f"Context:\n{context}")
         # Get response
         response = chain.invoke({"question": request.message})
+        logger.info(f"Generated response: {response.content}")
         return PodcastChatResponse(response=response.content)
+    except HTTPException:
+        raise
     except Exception as e:
         logger.error(f"Error in podcast chat: {str(e)}", exc_info=True)
         raise HTTPException(status_code=500, detail=str(e))