Final_Assignment_Template

Sleeping

App Files Files Community

sabonzo commited on Apr 25, 2025

Commit

58ca220

verified ·

1 Parent(s): a445487

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -22

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ import chess.engine # For chess engine interaction
 import base64 # For encoding images for multimodal models
 import logging # For better debugging
 import subprocess # To check for stockfish
 # Langchain specific imports
 from langchain_openai import ChatOpenAI, OpenAIEmbeddings # Or other LLM providers
 from langchain.agents import AgentExecutor, create_openai_tools_agent # Or other agent types
@@ -82,38 +82,36 @@ def transcribe_audio(file_path: str) -> str:
         # Ensure OPENAI_API_KEY is available
         if not os.getenv("OPENAI_API_KEY"):
              return "ERROR: OPENAI_API_KEY not set. Cannot transcribe audio."
-        # Use the ChatOpenAI client to access the underlying OpenAI client
-        llm_client = ChatOpenAI(model="gpt-4o", temperature=0).client # Need client for audio API
         with open(file_path, "rb") as audio_file:
-             # Use the transcription API directly
-            transcript = llm_client.audio.transcriptions.create(
                 model="whisper-1",
                 file=audio_file,
-                response_format="text"
             )
         logging.info(f"Transcription successful for {file_path}")
-        if isinstance(transcript, str):
-             return transcript
         else:
-             # Handle potential object response if format changes in future/different library versions
-             logging.warning(f"Unexpected transcript format type for {file_path}: {type(transcript)}. Attempting to extract text.")
-             try:
-                 # Common patterns: object with 'text' attribute, or dict with 'text' key
-                 if hasattr(transcript, 'text'):
-                     return transcript.text
-                 elif isinstance(transcript, dict) and 'text' in transcript:
-                     return transcript['text']
-                 else:
-                     # Fallback: convert to string, might contain useful info
-                     return str(transcript)
-             except Exception as extraction_err:
-                  logging.error(f"Could not extract text from unexpected transcript format: {extraction_err}")
-                  return "ERROR: Unexpected transcription format received and text extraction failed."
     except Exception as e:
         logging.error(f"Error during audio transcription for {file_path}: {e}")
         if "Invalid file format" in str(e) or "Unsupported file type" in str(e):
             return f"ERROR: Unsupported audio file format at {file_path}. Please ensure it's a format supported by Whisper (e.g., mp3, wav, m4a)."
         return f"ERROR: Could not transcribe audio file {file_path}. Details: {str(e)}"

 import base64 # For encoding images for multimodal models
 import logging # For better debugging
 import subprocess # To check for stockfish
+from openai import OpenAI
 # Langchain specific imports
 from langchain_openai import ChatOpenAI, OpenAIEmbeddings # Or other LLM providers
 from langchain.agents import AgentExecutor, create_openai_tools_agent # Or other agent types
         # Ensure OPENAI_API_KEY is available
         if not os.getenv("OPENAI_API_KEY"):
              return "ERROR: OPENAI_API_KEY not set. Cannot transcribe audio."
+        # === CHANGE HERE: Instantiate the base OpenAI client directly ===
+        client = OpenAI()
+        # === END CHANGE ===
         with open(file_path, "rb") as audio_file:
+             # Use the transcription API directly via the base client
+            transcript_response = client.audio.transcriptions.create(
                 model="whisper-1",
                 file=audio_file,
+                response_format="text" # Request text directly
             )
         logging.info(f"Transcription successful for {file_path}")
+        # The response should now be the text string directly when using response_format="text"
+        if isinstance(transcript_response, str):
+             return transcript_response
         else:
+             # Handle unexpected response format (less likely now but safe)
+             logging.warning(f"Whisper returned unexpected format: {type(transcript_response)}. Attempting conversion.")
+             return str(transcript_response) # Fallback
     except Exception as e:
+        # Keep existing specific error handling
         logging.error(f"Error during audio transcription for {file_path}: {e}")
         if "Invalid file format" in str(e) or "Unsupported file type" in str(e):
             return f"ERROR: Unsupported audio file format at {file_path}. Please ensure it's a format supported by Whisper (e.g., mp3, wav, m4a)."
+        # Add check for authentication errors
+        if "authentication" in str(e).lower() or "api key" in str(e).lower():
+            return f"ERROR: Authentication error during transcription. Check OPENAI_API_KEY. Details: {str(e)}"
         return f"ERROR: Could not transcribe audio file {file_path}. Details: {str(e)}"