Spaces:
Sleeping
Sleeping
Nagesh Muralidhar
commited on
Commit
·
4972b4c
1
Parent(s):
6c71315
midterm-submission
Browse files- podcraft/src/pages/PodcastForm.tsx +19 -12
- server/main.py +21 -11
podcraft/src/pages/PodcastForm.tsx
CHANGED
|
@@ -44,7 +44,10 @@ const PodcastForm: React.FC = () => {
|
|
| 44 |
useEffect(() => {
|
| 45 |
const fetchPodcastAndContext = async () => {
|
| 46 |
try {
|
| 47 |
-
if (!id)
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
// Fetch podcast details
|
| 50 |
const response = await fetch(`${API_URL}/api/audio-list`);
|
|
@@ -69,7 +72,7 @@ const PodcastForm: React.FC = () => {
|
|
| 69 |
const category = categoryWithExt.replace('.mp3', '');
|
| 70 |
|
| 71 |
return {
|
| 72 |
-
id: index + 1,
|
| 73 |
title: descriptionPart.replace(/_/g, ' ').replace(/^\w/, c => c.toUpperCase()),
|
| 74 |
description: `A debate exploring ${queryPart.replace(/_/g, ' ')}`,
|
| 75 |
audio_file: `${API_URL}${file.path}`,
|
|
@@ -79,15 +82,18 @@ const PodcastForm: React.FC = () => {
|
|
| 79 |
});
|
| 80 |
|
| 81 |
const selectedPodcast = podcastList.find(p => p.id === parseInt(id));
|
| 82 |
-
if (selectedPodcast) {
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
|
|
|
|
|
|
|
|
|
| 91 |
}
|
| 92 |
} catch (err) {
|
| 93 |
console.error('Error fetching podcast:', err);
|
|
@@ -124,7 +130,8 @@ const PodcastForm: React.FC = () => {
|
|
| 124 |
});
|
| 125 |
|
| 126 |
if (!response.ok) {
|
| 127 |
-
|
|
|
|
| 128 |
}
|
| 129 |
|
| 130 |
const data = await response.json();
|
|
|
|
| 44 |
useEffect(() => {
|
| 45 |
const fetchPodcastAndContext = async () => {
|
| 46 |
try {
|
| 47 |
+
if (!id) {
|
| 48 |
+
setError("No podcast ID provided");
|
| 49 |
+
return;
|
| 50 |
+
}
|
| 51 |
|
| 52 |
// Fetch podcast details
|
| 53 |
const response = await fetch(`${API_URL}/api/audio-list`);
|
|
|
|
| 72 |
const category = categoryWithExt.replace('.mp3', '');
|
| 73 |
|
| 74 |
return {
|
| 75 |
+
id: index + 1, // Use 1-based index for consistency
|
| 76 |
title: descriptionPart.replace(/_/g, ' ').replace(/^\w/, c => c.toUpperCase()),
|
| 77 |
description: `A debate exploring ${queryPart.replace(/_/g, ' ')}`,
|
| 78 |
audio_file: `${API_URL}${file.path}`,
|
|
|
|
| 82 |
});
|
| 83 |
|
| 84 |
const selectedPodcast = podcastList.find(p => p.id === parseInt(id));
|
| 85 |
+
if (!selectedPodcast) {
|
| 86 |
+
throw new Error(`Podcast with ID ${id} not found`);
|
| 87 |
+
}
|
| 88 |
+
setPodcast(selectedPodcast);
|
| 89 |
+
|
| 90 |
+
// Fetch podcast context
|
| 91 |
+
const contextResponse = await fetch(`${API_URL}/api/podcast/${id}/context`);
|
| 92 |
+
if (contextResponse.ok) {
|
| 93 |
+
const contextData: PodcastContext = await contextResponse.json();
|
| 94 |
+
setPodcastContext(contextData);
|
| 95 |
+
} else {
|
| 96 |
+
console.warn(`Could not fetch context for podcast ${id}`);
|
| 97 |
}
|
| 98 |
} catch (err) {
|
| 99 |
console.error('Error fetching podcast:', err);
|
|
|
|
| 130 |
});
|
| 131 |
|
| 132 |
if (!response.ok) {
|
| 133 |
+
const errorData = await response.text();
|
| 134 |
+
throw new Error(`Server error: ${response.status} ${errorData}`);
|
| 135 |
}
|
| 136 |
|
| 137 |
const data = await response.json();
|
server/main.py
CHANGED
|
@@ -333,7 +333,7 @@ async def get_podcast_context(podcast_id: str):
|
|
| 333 |
logger.error(f"Error in get_podcast_context: {str(e)}", exc_info=True)
|
| 334 |
raise HTTPException(status_code=500, detail=str(e))
|
| 335 |
|
| 336 |
-
@
|
| 337 |
async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
|
| 338 |
"""Handle chat messages for a specific podcast."""
|
| 339 |
try:
|
|
@@ -342,34 +342,39 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
|
|
| 342 |
# Path to transcripts file
|
| 343 |
transcripts_file = os.path.join(os.path.dirname(__file__), "transcripts", "podcasts.json")
|
| 344 |
|
| 345 |
-
# Check if transcripts file exists
|
| 346 |
if not os.path.exists(transcripts_file):
|
| 347 |
-
logger.
|
| 348 |
-
|
| 349 |
-
json.dump([], f)
|
| 350 |
-
raise HTTPException(status_code=404, detail="No transcript available for this podcast yet")
|
| 351 |
|
| 352 |
# Read transcripts
|
| 353 |
try:
|
| 354 |
with open(transcripts_file, 'r') as f:
|
| 355 |
transcripts = json.load(f)
|
|
|
|
| 356 |
except json.JSONDecodeError as e:
|
| 357 |
-
logger.error(f"Error
|
| 358 |
-
raise HTTPException(status_code=500, detail="Error reading
|
| 359 |
|
| 360 |
# Convert podcast_id to zero-based index
|
| 361 |
try:
|
| 362 |
podcast_index = int(podcast_id) - 1
|
| 363 |
if podcast_index < 0 or podcast_index >= len(transcripts):
|
|
|
|
| 364 |
raise ValueError(f"Invalid podcast ID: {podcast_id}")
|
| 365 |
except ValueError as e:
|
|
|
|
| 366 |
raise HTTPException(status_code=404, detail=str(e))
|
| 367 |
|
| 368 |
# Get podcast transcript
|
| 369 |
try:
|
| 370 |
podcast_transcript = transcripts[podcast_index].get("podcastScript")
|
| 371 |
if not podcast_transcript:
|
|
|
|
| 372 |
raise HTTPException(status_code=404, detail="No transcript content found for this podcast")
|
|
|
|
|
|
|
|
|
|
| 373 |
except (IndexError, KeyError) as e:
|
| 374 |
logger.error(f"Error accessing podcast transcript: {str(e)}")
|
| 375 |
raise HTTPException(status_code=404, detail="Transcript not found for this podcast")
|
|
@@ -383,8 +388,10 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
|
|
| 383 |
|
| 384 |
# Use split_text for strings instead of split_documents
|
| 385 |
chunks = text_splitter.split_text(podcast_transcript)
|
|
|
|
| 386 |
|
| 387 |
if not chunks:
|
|
|
|
| 388 |
raise HTTPException(status_code=404, detail="No content chunks found in transcript")
|
| 389 |
|
| 390 |
# Initialize embedding model
|
|
@@ -400,6 +407,7 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
|
|
| 400 |
location=":memory:", # Use in-memory storage
|
| 401 |
collection_name=collection_name
|
| 402 |
)
|
|
|
|
| 403 |
|
| 404 |
# Configure the retriever with search parameters
|
| 405 |
qdrant_retriever = vectorstore.as_retriever(
|
|
@@ -428,7 +436,8 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
|
|
| 428 |
|
| 429 |
# Add logging for the retrieved documents and final prompt
|
| 430 |
def get_context_and_log(input_dict):
|
| 431 |
-
|
|
|
|
| 432 |
logger.info("Retrieved context from podcast:")
|
| 433 |
logger.info("-" * 50)
|
| 434 |
logger.info(f"Context:\n{context}")
|
|
@@ -447,11 +456,12 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
|
|
| 447 |
|
| 448 |
# Get response
|
| 449 |
response = chain.invoke({"question": request.message})
|
|
|
|
| 450 |
|
| 451 |
return PodcastChatResponse(response=response.content)
|
| 452 |
|
| 453 |
-
except HTTPException
|
| 454 |
-
raise
|
| 455 |
except Exception as e:
|
| 456 |
logger.error(f"Error in podcast chat: {str(e)}", exc_info=True)
|
| 457 |
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
| 333 |
logger.error(f"Error in get_podcast_context: {str(e)}", exc_info=True)
|
| 334 |
raise HTTPException(status_code=500, detail=str(e))
|
| 335 |
|
| 336 |
+
@app.post("/podcast-chat/{podcast_id}", response_model=PodcastChatResponse)
|
| 337 |
async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
|
| 338 |
"""Handle chat messages for a specific podcast."""
|
| 339 |
try:
|
|
|
|
| 342 |
# Path to transcripts file
|
| 343 |
transcripts_file = os.path.join(os.path.dirname(__file__), "transcripts", "podcasts.json")
|
| 344 |
|
| 345 |
+
# Check if transcripts file exists
|
| 346 |
if not os.path.exists(transcripts_file):
|
| 347 |
+
logger.error("Transcripts file not found")
|
| 348 |
+
raise HTTPException(status_code=404, detail="Transcripts file not found")
|
|
|
|
|
|
|
| 349 |
|
| 350 |
# Read transcripts
|
| 351 |
try:
|
| 352 |
with open(transcripts_file, 'r') as f:
|
| 353 |
transcripts = json.load(f)
|
| 354 |
+
logger.info(f"Loaded {len(transcripts)} transcripts")
|
| 355 |
except json.JSONDecodeError as e:
|
| 356 |
+
logger.error(f"Error decoding transcripts file: {str(e)}")
|
| 357 |
+
raise HTTPException(status_code=500, detail="Error reading transcripts file")
|
| 358 |
|
| 359 |
# Convert podcast_id to zero-based index
|
| 360 |
try:
|
| 361 |
podcast_index = int(podcast_id) - 1
|
| 362 |
if podcast_index < 0 or podcast_index >= len(transcripts):
|
| 363 |
+
logger.error(f"Invalid podcast index: {podcast_index} (total transcripts: {len(transcripts)})")
|
| 364 |
raise ValueError(f"Invalid podcast ID: {podcast_id}")
|
| 365 |
except ValueError as e:
|
| 366 |
+
logger.error(f"Error converting podcast ID: {str(e)}")
|
| 367 |
raise HTTPException(status_code=404, detail=str(e))
|
| 368 |
|
| 369 |
# Get podcast transcript
|
| 370 |
try:
|
| 371 |
podcast_transcript = transcripts[podcast_index].get("podcastScript")
|
| 372 |
if not podcast_transcript:
|
| 373 |
+
logger.error(f"No transcript content found for podcast {podcast_id}")
|
| 374 |
raise HTTPException(status_code=404, detail="No transcript content found for this podcast")
|
| 375 |
+
|
| 376 |
+
logger.info(f"Found transcript for podcast {podcast_id}")
|
| 377 |
+
logger.debug(f"Transcript content: {podcast_transcript[:200]}...") # Log first 200 chars
|
| 378 |
except (IndexError, KeyError) as e:
|
| 379 |
logger.error(f"Error accessing podcast transcript: {str(e)}")
|
| 380 |
raise HTTPException(status_code=404, detail="Transcript not found for this podcast")
|
|
|
|
| 388 |
|
| 389 |
# Use split_text for strings instead of split_documents
|
| 390 |
chunks = text_splitter.split_text(podcast_transcript)
|
| 391 |
+
logger.info(f"Split transcript into {len(chunks)} chunks")
|
| 392 |
|
| 393 |
if not chunks:
|
| 394 |
+
logger.error("No content chunks found in transcript")
|
| 395 |
raise HTTPException(status_code=404, detail="No content chunks found in transcript")
|
| 396 |
|
| 397 |
# Initialize embedding model
|
|
|
|
| 407 |
location=":memory:", # Use in-memory storage
|
| 408 |
collection_name=collection_name
|
| 409 |
)
|
| 410 |
+
logger.info(f"Created vector store for podcast {podcast_id}")
|
| 411 |
|
| 412 |
# Configure the retriever with search parameters
|
| 413 |
qdrant_retriever = vectorstore.as_retriever(
|
|
|
|
| 436 |
|
| 437 |
# Add logging for the retrieved documents and final prompt
|
| 438 |
def get_context_and_log(input_dict):
|
| 439 |
+
retrieved_docs = qdrant_retriever.get_relevant_documents(input_dict["question"])
|
| 440 |
+
context = format_docs(retrieved_docs)
|
| 441 |
logger.info("Retrieved context from podcast:")
|
| 442 |
logger.info("-" * 50)
|
| 443 |
logger.info(f"Context:\n{context}")
|
|
|
|
| 456 |
|
| 457 |
# Get response
|
| 458 |
response = chain.invoke({"question": request.message})
|
| 459 |
+
logger.info(f"Generated response: {response.content}")
|
| 460 |
|
| 461 |
return PodcastChatResponse(response=response.content)
|
| 462 |
|
| 463 |
+
except HTTPException:
|
| 464 |
+
raise
|
| 465 |
except Exception as e:
|
| 466 |
logger.error(f"Error in podcast chat: {str(e)}", exc_info=True)
|
| 467 |
raise HTTPException(status_code=500, detail=str(e))
|