sabonzo commited on
Commit
58ca220
·
verified ·
1 Parent(s): a445487

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -22
app.py CHANGED
@@ -12,7 +12,7 @@ import chess.engine # For chess engine interaction
12
  import base64 # For encoding images for multimodal models
13
  import logging # For better debugging
14
  import subprocess # To check for stockfish
15
-
16
  # Langchain specific imports
17
  from langchain_openai import ChatOpenAI, OpenAIEmbeddings # Or other LLM providers
18
  from langchain.agents import AgentExecutor, create_openai_tools_agent # Or other agent types
@@ -82,38 +82,36 @@ def transcribe_audio(file_path: str) -> str:
82
  # Ensure OPENAI_API_KEY is available
83
  if not os.getenv("OPENAI_API_KEY"):
84
  return "ERROR: OPENAI_API_KEY not set. Cannot transcribe audio."
85
- # Use the ChatOpenAI client to access the underlying OpenAI client
86
- llm_client = ChatOpenAI(model="gpt-4o", temperature=0).client # Need client for audio API
 
 
 
87
  with open(file_path, "rb") as audio_file:
88
- # Use the transcription API directly
89
- transcript = llm_client.audio.transcriptions.create(
90
  model="whisper-1",
91
  file=audio_file,
92
- response_format="text"
93
  )
94
  logging.info(f"Transcription successful for {file_path}")
95
- if isinstance(transcript, str):
96
- return transcript
 
 
97
  else:
98
- # Handle potential object response if format changes in future/different library versions
99
- logging.warning(f"Unexpected transcript format type for {file_path}: {type(transcript)}. Attempting to extract text.")
100
- try:
101
- # Common patterns: object with 'text' attribute, or dict with 'text' key
102
- if hasattr(transcript, 'text'):
103
- return transcript.text
104
- elif isinstance(transcript, dict) and 'text' in transcript:
105
- return transcript['text']
106
- else:
107
- # Fallback: convert to string, might contain useful info
108
- return str(transcript)
109
- except Exception as extraction_err:
110
- logging.error(f"Could not extract text from unexpected transcript format: {extraction_err}")
111
- return "ERROR: Unexpected transcription format received and text extraction failed."
112
 
113
  except Exception as e:
 
114
  logging.error(f"Error during audio transcription for {file_path}: {e}")
115
  if "Invalid file format" in str(e) or "Unsupported file type" in str(e):
116
  return f"ERROR: Unsupported audio file format at {file_path}. Please ensure it's a format supported by Whisper (e.g., mp3, wav, m4a)."
 
 
 
117
  return f"ERROR: Could not transcribe audio file {file_path}. Details: {str(e)}"
118
 
119
 
 
12
  import base64 # For encoding images for multimodal models
13
  import logging # For better debugging
14
  import subprocess # To check for stockfish
15
+ from openai import OpenAI
16
  # Langchain specific imports
17
  from langchain_openai import ChatOpenAI, OpenAIEmbeddings # Or other LLM providers
18
  from langchain.agents import AgentExecutor, create_openai_tools_agent # Or other agent types
 
82
  # Ensure OPENAI_API_KEY is available
83
  if not os.getenv("OPENAI_API_KEY"):
84
  return "ERROR: OPENAI_API_KEY not set. Cannot transcribe audio."
85
+
86
+ # === CHANGE HERE: Instantiate the base OpenAI client directly ===
87
+ client = OpenAI()
88
+ # === END CHANGE ===
89
+
90
  with open(file_path, "rb") as audio_file:
91
+ # Use the transcription API directly via the base client
92
+ transcript_response = client.audio.transcriptions.create(
93
  model="whisper-1",
94
  file=audio_file,
95
+ response_format="text" # Request text directly
96
  )
97
  logging.info(f"Transcription successful for {file_path}")
98
+
99
+ # The response should now be the text string directly when using response_format="text"
100
+ if isinstance(transcript_response, str):
101
+ return transcript_response
102
  else:
103
+ # Handle unexpected response format (less likely now but safe)
104
+ logging.warning(f"Whisper returned unexpected format: {type(transcript_response)}. Attempting conversion.")
105
+ return str(transcript_response) # Fallback
 
 
 
 
 
 
 
 
 
 
 
106
 
107
  except Exception as e:
108
+ # Keep existing specific error handling
109
  logging.error(f"Error during audio transcription for {file_path}: {e}")
110
  if "Invalid file format" in str(e) or "Unsupported file type" in str(e):
111
  return f"ERROR: Unsupported audio file format at {file_path}. Please ensure it's a format supported by Whisper (e.g., mp3, wav, m4a)."
112
+ # Add check for authentication errors
113
+ if "authentication" in str(e).lower() or "api key" in str(e).lower():
114
+ return f"ERROR: Authentication error during transcription. Check OPENAI_API_KEY. Details: {str(e)}"
115
  return f"ERROR: Could not transcribe audio file {file_path}. Details: {str(e)}"
116
 
117