Final_Assignment_Template

Sleeping

App Files Files Community

philincloud commited on May 27, 2025

Commit

830a3ef

verified ·

1 Parent(s): 66b821b

Update langgraph_agent.py

Browse files

Files changed (1) hide show

langgraph_agent.py +24 -6

langgraph_agent.py CHANGED Viewed

@@ -91,7 +91,7 @@ HF_INFERENCE_CLIENT = None
 if HF_API_TOKEN:
     HF_INFERENCE_CLIENT = InferenceClient(token=HF_API_TOKEN)
 else:
-    print("WARNING: HF_API_TOKEN not set. Image tools will not function.")
 @tool
 def read_file_content(file_path: str) -> Dict[str, str]:
@@ -115,8 +115,8 @@ def read_file_content(file_path: str) -> Dict[str, str]:
             # Indicate that it's an image and needs to be described by a specific tool
             return {"file_type": "image", "file_name": file_path, "file_content": f"Image file '{file_path}' detected. Use 'describe_image' tool to get a textual description."}
         elif file_extension == ".mp3":
-            # Indicate that it's an audio file and the LLM should process it natively
-            return {"file_type": "audio", "file_name": file_path, "file_content": f"Audio file '{file_path}' detected. The LLM should process this natively."}
         else:
             return {"file_type": "unsupported", "file_name": file_path, "file_content": f"Unsupported file type: {file_extension}. Only .txt, .py, .xlsx, .jpeg, .jpg, .png, .mp3 files are recognized."}
     except FileNotFoundError:
@@ -159,6 +159,24 @@ def describe_image(image_path: str) -> Dict[str, str]:
     except Exception as e:
         return {"error": f"Error describing image {image_path}: {str(e)}"}
 API_KEY = os.getenv("GEMINI_API_KEY")
 HF_SPACE_TOKEN = os.getenv("HF_SPACE_TOKEN")
@@ -170,8 +188,8 @@ tools = [
     wiki_search, web_search, arvix_search,
     read_file_content,
     python_interpreter,
-    describe_image,  # Added new tool
-    # transcribe_audio, # Removed as per user request
 ]
@@ -184,7 +202,7 @@ def build_graph(provider: str = "gemini"):
     """Build the LangGraph agent with chosen LLM (default: Gemini)."""
     if provider == "gemini":
         llm = ChatGoogleGenerativeAI(
-        model= "gemini-1.5-flash-preview-05-20", # This model is capable of native audio processing
         temperature=1.0,
         max_retries=2,
         api_key=GEMINI_API_KEY,

 if HF_API_TOKEN:
     HF_INFERENCE_CLIENT = InferenceClient(token=HF_API_TOKEN)
 else:
+    print("WARNING: HF_API_TOKEN not set. Image and Audio tools will not function.")
 @tool
 def read_file_content(file_path: str) -> Dict[str, str]:
             # Indicate that it's an image and needs to be described by a specific tool
             return {"file_type": "image", "file_name": file_path, "file_content": f"Image file '{file_path}' detected. Use 'describe_image' tool to get a textual description."}
         elif file_extension == ".mp3":
+            # Indicate that it's an audio file and needs to be transcribed by a specific tool
+            return {"file_type": "audio", "file_name": file_path, "file_content": f"Audio file '{file_path}' detected. Use 'transcribe_audio' tool to get the text transcription."}
         else:
             return {"file_type": "unsupported", "file_name": file_path, "file_content": f"Unsupported file type: {file_extension}. Only .txt, .py, .xlsx, .jpeg, .jpg, .png, .mp3 files are recognized."}
     except FileNotFoundError:
     except Exception as e:
         return {"error": f"Error describing image {image_path}: {str(e)}"}
+@tool
+def transcribe_audio(audio_path: str) -> Dict[str, str]:
+    """
+    Transcribes an audio file (e.g., MP3) to text using an automatic speech recognition model
+    from the Hugging Face Inference API. Requires HF_API_TOKEN environment variable to be set.
+    """
+    if not HF_INFERENCE_CLIENT:
+        return {"error": "Hugging Face API token not configured for audio transcription. Cannot use this tool."}
+    try:
+        with open(audio_path, "rb") as f:
+            audio_bytes = f.read()
+        transcription = HF_INFERENCE_CLIENT.automatic_speech_recognition(audio_bytes)
+        return {"audio_transcription": transcription, "audio_path": audio_path}
+    except FileNotFoundError:
+        return {"error": f"Audio file not found: {audio_path}. Please ensure the file exists."}
+    except Exception as e:
+        return {"error": f"Error transcribing audio {audio_path}: {str(e)}"}
 API_KEY = os.getenv("GEMINI_API_KEY")
 HF_SPACE_TOKEN = os.getenv("HF_SPACE_TOKEN")
     wiki_search, web_search, arvix_search,
     read_file_content,
     python_interpreter,
+    describe_image,
+    transcribe_audio, # Re-added tool
 ]
     """Build the LangGraph agent with chosen LLM (default: Gemini)."""
     if provider == "gemini":
         llm = ChatGoogleGenerativeAI(
+        model= "gemini-1.5-flash-preview-05-20",
         temperature=1.0,
         max_retries=2,
         api_key=GEMINI_API_KEY,