Spaces:
Sleeping
Sleeping
Nagesh Muralidhar
commited on
Commit
·
bd04115
1
Parent(s):
1aeaa53
midterm-submission
Browse files- server/agents.py +8 -0
- server/main.py +33 -5
- server/workflow.py +12 -2
server/agents.py
CHANGED
|
@@ -15,6 +15,7 @@ import numpy as np
|
|
| 15 |
from langchain.schema import SystemMessage, HumanMessage, AIMessage
|
| 16 |
from langchain.output_parsers import PydanticOutputParser
|
| 17 |
from pydantic import BaseModel, Field
|
|
|
|
| 18 |
|
| 19 |
# Configure logging
|
| 20 |
logging.basicConfig(
|
|
@@ -382,6 +383,13 @@ class PodcastProducerAgent:
|
|
| 382 |
|
| 383 |
podcast_logger.info(f"Successfully saved audio file: {filepath}")
|
| 384 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
return {
|
| 386 |
"type": "podcast",
|
| 387 |
"content": script_response.content,
|
|
|
|
| 15 |
from langchain.schema import SystemMessage, HumanMessage, AIMessage
|
| 16 |
from langchain.output_parsers import PydanticOutputParser
|
| 17 |
from pydantic import BaseModel, Field
|
| 18 |
+
from workflow import save_transcript
|
| 19 |
|
| 20 |
# Configure logging
|
| 21 |
logging.basicConfig(
|
|
|
|
| 383 |
|
| 384 |
podcast_logger.info(f"Successfully saved audio file: {filepath}")
|
| 385 |
|
| 386 |
+
# Save the transcript
|
| 387 |
+
try:
|
| 388 |
+
save_transcript(script_response.content, user_query)
|
| 389 |
+
podcast_logger.info("Successfully saved transcript")
|
| 390 |
+
except Exception as e:
|
| 391 |
+
podcast_logger.error(f"Error saving transcript: {str(e)}")
|
| 392 |
+
|
| 393 |
return {
|
| 394 |
"type": "podcast",
|
| 395 |
"content": script_response.content,
|
server/main.py
CHANGED
|
@@ -82,6 +82,16 @@ os.makedirs(audio_dir, exist_ok=True)
|
|
| 82 |
context_dir = os.path.join(os.path.dirname(__file__), "context_storage")
|
| 83 |
os.makedirs(context_dir, exist_ok=True)
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
# API Routes
|
| 86 |
@api_router.post("/chat")
|
| 87 |
async def chat(message: ChatMessage):
|
|
@@ -332,13 +342,20 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
|
|
| 332 |
# Path to transcripts file
|
| 333 |
transcripts_file = os.path.join(os.path.dirname(__file__), "transcripts", "podcasts.json")
|
| 334 |
|
| 335 |
-
# Check if transcripts file exists
|
| 336 |
if not os.path.exists(transcripts_file):
|
| 337 |
-
|
|
|
|
|
|
|
|
|
|
| 338 |
|
| 339 |
# Read transcripts
|
| 340 |
-
|
| 341 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
|
| 343 |
# Convert podcast_id to zero-based index
|
| 344 |
try:
|
|
@@ -349,7 +366,13 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
|
|
| 349 |
raise HTTPException(status_code=404, detail=str(e))
|
| 350 |
|
| 351 |
# Get podcast transcript
|
| 352 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
|
| 354 |
# Split text into chunks
|
| 355 |
text_splitter = RecursiveCharacterTextSplitter(
|
|
@@ -361,6 +384,9 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
|
|
| 361 |
# Use split_text for strings instead of split_documents
|
| 362 |
chunks = text_splitter.split_text(podcast_transcript)
|
| 363 |
|
|
|
|
|
|
|
|
|
|
| 364 |
# Initialize embedding model
|
| 365 |
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
|
| 366 |
|
|
@@ -424,6 +450,8 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
|
|
| 424 |
|
| 425 |
return PodcastChatResponse(response=response.content)
|
| 426 |
|
|
|
|
|
|
|
| 427 |
except Exception as e:
|
| 428 |
logger.error(f"Error in podcast chat: {str(e)}", exc_info=True)
|
| 429 |
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
| 82 |
context_dir = os.path.join(os.path.dirname(__file__), "context_storage")
|
| 83 |
os.makedirs(context_dir, exist_ok=True)
|
| 84 |
|
| 85 |
+
# Add transcripts directory
|
| 86 |
+
transcripts_dir = os.path.join(os.path.dirname(__file__), "transcripts")
|
| 87 |
+
os.makedirs(transcripts_dir, exist_ok=True)
|
| 88 |
+
|
| 89 |
+
# Initialize empty transcripts file if it doesn't exist
|
| 90 |
+
transcripts_file = os.path.join(transcripts_dir, "podcasts.json")
|
| 91 |
+
if not os.path.exists(transcripts_file):
|
| 92 |
+
with open(transcripts_file, 'w') as f:
|
| 93 |
+
json.dump([], f)
|
| 94 |
+
|
| 95 |
# API Routes
|
| 96 |
@api_router.post("/chat")
|
| 97 |
async def chat(message: ChatMessage):
|
|
|
|
| 342 |
# Path to transcripts file
|
| 343 |
transcripts_file = os.path.join(os.path.dirname(__file__), "transcripts", "podcasts.json")
|
| 344 |
|
| 345 |
+
# Check if transcripts file exists and initialize if needed
|
| 346 |
if not os.path.exists(transcripts_file):
|
| 347 |
+
logger.warning("Transcripts file not found, initializing empty file")
|
| 348 |
+
with open(transcripts_file, 'w') as f:
|
| 349 |
+
json.dump([], f)
|
| 350 |
+
raise HTTPException(status_code=404, detail="No transcript available for this podcast yet")
|
| 351 |
|
| 352 |
# Read transcripts
|
| 353 |
+
try:
|
| 354 |
+
with open(transcripts_file, 'r') as f:
|
| 355 |
+
transcripts = json.load(f)
|
| 356 |
+
except json.JSONDecodeError as e:
|
| 357 |
+
logger.error(f"Error reading transcripts file: {str(e)}")
|
| 358 |
+
raise HTTPException(status_code=500, detail="Error reading podcast transcript")
|
| 359 |
|
| 360 |
# Convert podcast_id to zero-based index
|
| 361 |
try:
|
|
|
|
| 366 |
raise HTTPException(status_code=404, detail=str(e))
|
| 367 |
|
| 368 |
# Get podcast transcript
|
| 369 |
+
try:
|
| 370 |
+
podcast_transcript = transcripts[podcast_index].get("podcastScript")
|
| 371 |
+
if not podcast_transcript:
|
| 372 |
+
raise HTTPException(status_code=404, detail="No transcript content found for this podcast")
|
| 373 |
+
except (IndexError, KeyError) as e:
|
| 374 |
+
logger.error(f"Error accessing podcast transcript: {str(e)}")
|
| 375 |
+
raise HTTPException(status_code=404, detail="Transcript not found for this podcast")
|
| 376 |
|
| 377 |
# Split text into chunks
|
| 378 |
text_splitter = RecursiveCharacterTextSplitter(
|
|
|
|
| 384 |
# Use split_text for strings instead of split_documents
|
| 385 |
chunks = text_splitter.split_text(podcast_transcript)
|
| 386 |
|
| 387 |
+
if not chunks:
|
| 388 |
+
raise HTTPException(status_code=404, detail="No content chunks found in transcript")
|
| 389 |
+
|
| 390 |
# Initialize embedding model
|
| 391 |
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
|
| 392 |
|
|
|
|
| 450 |
|
| 451 |
return PodcastChatResponse(response=response.content)
|
| 452 |
|
| 453 |
+
except HTTPException as he:
|
| 454 |
+
raise he
|
| 455 |
except Exception as e:
|
| 456 |
logger.error(f"Error in podcast chat: {str(e)}", exc_info=True)
|
| 457 |
raise HTTPException(status_code=500, detail=str(e))
|
server/workflow.py
CHANGED
|
@@ -26,8 +26,13 @@ def save_transcript(podcast_script: str, user_query: str) -> None:
|
|
| 26 |
try:
|
| 27 |
# Load existing transcripts
|
| 28 |
if os.path.exists(TRANSCRIPTS_FILE):
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
else:
|
| 32 |
transcripts = []
|
| 33 |
|
|
@@ -40,6 +45,11 @@ def save_transcript(podcast_script: str, user_query: str) -> None:
|
|
| 40 |
|
| 41 |
except Exception as e:
|
| 42 |
print(f"Error saving transcript: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
class AgentState(TypedDict):
|
| 45 |
messages: List[Dict[str, Any]]
|
|
|
|
| 26 |
try:
|
| 27 |
# Load existing transcripts
|
| 28 |
if os.path.exists(TRANSCRIPTS_FILE):
|
| 29 |
+
try:
|
| 30 |
+
with open(TRANSCRIPTS_FILE, 'r') as f:
|
| 31 |
+
transcripts = json.load(f)
|
| 32 |
+
if not isinstance(transcripts, list):
|
| 33 |
+
transcripts = []
|
| 34 |
+
except json.JSONDecodeError:
|
| 35 |
+
transcripts = []
|
| 36 |
else:
|
| 37 |
transcripts = []
|
| 38 |
|
|
|
|
| 45 |
|
| 46 |
except Exception as e:
|
| 47 |
print(f"Error saving transcript: {str(e)}")
|
| 48 |
+
# Create directory if it doesn't exist
|
| 49 |
+
os.makedirs(os.path.dirname(TRANSCRIPTS_FILE), exist_ok=True)
|
| 50 |
+
# Try to save just this transcript
|
| 51 |
+
with open(TRANSCRIPTS_FILE, 'w') as f:
|
| 52 |
+
json.dump([transcript], f, indent=2)
|
| 53 |
|
| 54 |
class AgentState(TypedDict):
|
| 55 |
messages: List[Dict[str, Any]]
|