Spaces:

gabejavitt
/

agentCourse

Sleeping

App Files Files Community

gabejavitt commited on Nov 5, 2025

Commit

cbea565

verified ·

1 Parent(s): 4961f0a

Update app.py

Browse files

Files changed (1) hide show

app.py +138 -39

app.py CHANGED Viewed

@@ -21,6 +21,8 @@ from transformers import pipeline
 from youtube_transcript_api import YouTubeTranscriptApi
 from bs4 import BeautifulSoup
 import requests
 # LangChain & LangGraph
 from langgraph.graph.message import add_messages
@@ -32,7 +34,6 @@ from langchain_groq import ChatGroq
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_community.llms import HuggingFaceHub
 # RAG
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
@@ -470,6 +471,85 @@ def audio_transcription_tool(file_path: str) -> str:
         return f"Transcription error: {str(e)}"
 class YoutubeInput(BaseModel):
     video_url: str = Field(description="YouTube URL")
@@ -491,8 +571,6 @@ def get_youtube_transcript(video_url: str) -> str:
         if not video_id:
             return f"Error: Could not extract video ID."
-        from youtube_transcript_api import YouTubeTranscriptApi
         # FIXED: Use get_transcript instead of list_transcripts
         transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
@@ -614,6 +692,7 @@ defined_tools = [
     # Specialized
     audio_transcription_tool,
     get_youtube_transcript,
     scrape_and_retrieve,
@@ -722,7 +801,7 @@ def parse_tool_call_from_string(content: str, tools: List) -> List[ToolCall]:
 # =============================================================================
-# CONDITIONAL EDGE FUNCTION (FIXED)
 # =============================================================================
 def should_continue(state: AgentState):
     """Decide next step with robust logic."""
@@ -770,7 +849,6 @@ def should_continue(state: AgentState):
     # 5. Default: continue to agent
     print(f"🔄 Default → continuing to agent")
-    return "agent"
 # =============================================================================
@@ -1150,14 +1228,35 @@ REMEMBER: One tool per turn. No reasoning without tools. Exact answer format.
         print(f"🎯 NEW QUESTION")
         print(f"{'='*70}")
         print(f"Q: {question[:200]}{'...' if len(question) > 200 else ''}")
         print(f"{'='*70}\n")
         graph_input = {
             "messages": [
                 SystemMessage(content=self.system_prompt),
-                HumanMessage(content=question + (f"\n\n[FILE ATTACHED: {file_path}]" if file_path else ""))
             ],
-            "file_path": file_path, # Add this to the graph state,
             "turn": 0,
             "has_plan": False,
             "consecutive_errors": 0,
@@ -1287,22 +1386,23 @@ except Exception as e:
     traceback.print_exc()
     agent = None
-# ====================================================
-# --- (Original Template Code - Mock Questions Version) ---
-def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
-    # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
-        username= f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
         return "Please Login to Hugging Face with the button.", None
     # Use the globally instantiated agent
     global agent
@@ -1317,13 +1417,6 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent ( modify this part to create your agent)
-    #try:
-    #    agent = BasicAgent()
-    #except Exception as e:
-    #    print(f"Error instantiating agent: {e}")
-    #    return f"Error initializing agent: {e}", None
-    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
@@ -1357,11 +1450,10 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         task_id = item.get("task_id")
         question_text = item.get("question")
-        # 🌟 Initialize file variables for the current question
         local_file_path = None
-        file_info = ""
-        # 🌟 CRITICAL: Check if 'file_path' exists in the item dictionary
         if item.get("file_path"):
             file_path_from_api = item["file_path"]
             file_download_url = f"{DEFAULT_API_URL}/files/{task_id}"
@@ -1370,9 +1462,11 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             original_filename = file_path_from_api.split('/')[-1]
             # Set the path where the file will be saved locally
-            local_file_path = os.path.join("/tmp", original_filename)
-            # --- (Add streaming update here for file download status) ---
             try:
                 file_response = requests.get(file_download_url, timeout=15)
@@ -1380,30 +1474,37 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
                 # Save the raw bytes content to the local file path
                 with open(local_file_path, 'wb') as f:
-                    f.write(file_response.content)
-                print(f"✅ Downloaded file to: {local_file_path}")
-                # Set the context string to be passed to the agent
-                file_info = f"\n\n[FILE ATTACHED: {local_file_path}]"
             except requests.exceptions.RequestException as e:
                 error_message = f"[FILE DOWNLOAD ERROR: Could not fetch file: {e}]"
                 print(f"⚠️ {error_message}")
-                # Still provide the error message as context to the agent
-                file_info = f"\n\n{error_message}"
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            question_with_context = question_text + file_info
-            #submitted_answer = agent(question_with_context)
-            submitted_answer = agent(question_text, local_file_path if item.get("file_path") else None)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
              print(f"Error running agent on task {task_id}: {e}")
              results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
@@ -1480,7 +1581,6 @@ with gr.Blocks() as demo:
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    # Removed max_rows=10 from DataFrame constructor
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
     run_button.click(
@@ -1490,9 +1590,8 @@ with gr.Blocks() as demo:
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
-    # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -1500,7 +1599,7 @@ if __name__ == "__main__":
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id_startup: # Print repo URLs if SPACE_ID is found
         print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
         print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")

 from youtube_transcript_api import YouTubeTranscriptApi
 from bs4 import BeautifulSoup
 import requests
+from PIL import Image
+import base64
 # LangChain & LangGraph
 from langgraph.graph.message import add_messages
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_community.llms import HuggingFaceHub
 # RAG
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
         return f"Transcription error: {str(e)}"
+class ImageAnalysisInput(BaseModel):
+    file_path: str = Field(description="Image file path")
+    query: str = Field(description="What to analyze in the image")
+@tool(args_schema=ImageAnalysisInput)
+def analyze_image(file_path: str, query: str) -> str:
+    """
+    Analyzes images using Google Gemini Vision API.
+    Use for: chess positions, diagrams, charts, photos, screenshots.
+    Provide the EXACT file path from [FILE ATTACHED: ...] in the question.
+    """
+    if not file_path or not query:
+        return "Error: file_path and query required."
+    print(f"🖼️ Analyzing image: {file_path}")
+    print(f"   Query: {query[:100]}...")
+    # Try to find the file
+    image_path = find_file(file_path)
+    # If not found via find_file, try the path directly (for /tmp files)
+    if not image_path and os.path.exists(file_path):
+        image_path = Path(file_path)
+    if not image_path or not image_path.exists():
+        return f"Error: Image not found at '{file_path}'. Check [FILE ATTACHED: ...] in question for correct path."
+    print(f"✓ Found image at: {image_path}")
+    try:
+        GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY")
+        if not GOOGLE_API_KEY:
+            return "Error: GEMINI_API_KEY not set."
+        # Load and encode image
+        img = Image.open(image_path)
+        print(f"   Image size: {img.size}, mode: {img.mode}")
+        # Convert to RGB if necessary
+        if img.mode not in ['RGB', 'RGBA']:
+            img = img.convert('RGB')
+        # Convert to base64
+        buffered = io.BytesIO()
+        img.save(buffered, format="JPEG")
+        img_base64 = base64.b64encode(buffered.getvalue()).decode()
+        print(f"   Encoded image: {len(img_base64)} bytes")
+        # Use Gemini Vision
+        vision_llm = ChatGoogleGenerativeAI(
+            model="gemini-2.0-flash-exp",
+            google_api_key=GOOGLE_API_KEY,
+            temperature=0
+        )
+        message = HumanMessage(
+            content=[
+                {"type": "text", "text": query},
+                {
+                    "type": "image_url",
+                    "image_url": f"data:image/jpeg;base64,{img_base64}"
+                }
+            ]
+        )
+        print(f"   Sending to Gemini Vision...")
+        response = vision_llm.invoke([message])
+        print(f"✓ Got response: {len(response.content)} chars")
+        return f"Image Analysis:\n{truncate_if_needed(response.content)}"
+    except Exception as e:
+        error_msg = f"Image analysis error: {str(e)}"
+        print(f"❌ {error_msg}")
+        print(traceback.format_exc())
+        return error_msg
 class YoutubeInput(BaseModel):
     video_url: str = Field(description="YouTube URL")
         if not video_id:
             return f"Error: Could not extract video ID."
         # FIXED: Use get_transcript instead of list_transcripts
         transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
     # Specialized
     audio_transcription_tool,
+    analyze_image,  # NEW: Image analysis tool
     get_youtube_transcript,
     scrape_and_retrieve,
 # =============================================================================
+# CONDITIONAL EDGE FUNCTION
 # =============================================================================
 def should_continue(state: AgentState):
     """Decide next step with robust logic."""
     # 5. Default: continue to agent
     print(f"🔄 Default → continuing to agent")
 # =============================================================================
         print(f"🎯 NEW QUESTION")
         print(f"{'='*70}")
         print(f"Q: {question[:200]}{'...' if len(question) > 200 else ''}")
+        if file_path:
+            print(f"📎 File attached: {file_path}")
         print(f"{'='*70}\n")
+        # Enhanced question context with file information
+        question_text = question
+        if file_path:
+            file_ext = Path(file_path).suffix.lower()
+            file_type = "unknown"
+            if file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp']:
+                file_type = "image"
+            elif file_ext in ['.mp3', '.wav', '.m4a', '.flac']:
+                file_type = "audio"
+            elif file_ext in ['.csv', '.xlsx', '.xls']:
+                file_type = "data"
+            elif file_ext in ['.txt', '.pdf', '.doc', '.docx']:
+                file_type = "document"
+            question_text += f"\n\n[FILE ATTACHED: {file_path}]"
+            question_text += f"\n[FILE TYPE: {file_type}]"
+            question_text += f"\nIMPORTANT: Use the appropriate tool to access this file first!"
         graph_input = {
             "messages": [
                 SystemMessage(content=self.system_prompt),
+                HumanMessage(content=question_text)
             ],
+            "file_path": file_path,
             "turn": 0,
             "has_plan": False,
             "consecutive_errors": 0,
     traceback.print_exc()
     agent = None
+# =============================================================================
+# RUN AND SUBMIT FUNCTION
+# =============================================================================
+def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
+    space_id = os.getenv("SPACE_ID")
     if profile:
+        username = f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
         return "Please Login to Hugging Face with the button.", None
     # Use the globally instantiated agent
     global agent
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
         task_id = item.get("task_id")
         question_text = item.get("question")
+        # Initialize file variables for the current question
         local_file_path = None
+        # Check if 'file_path' exists in the item dictionary
         if item.get("file_path"):
             file_path_from_api = item["file_path"]
             file_download_url = f"{DEFAULT_API_URL}/files/{task_id}"
             original_filename = file_path_from_api.split('/')[-1]
             # Set the path where the file will be saved locally
+            local_file_path = os.path.join("/tmp", original_filename)
+            print(f"📥 Downloading file for task {task_id}...")
+            print(f"   URL: {file_download_url}")
+            print(f"   Saving to: {local_file_path}")
             try:
                 file_response = requests.get(file_download_url, timeout=15)
                 # Save the raw bytes content to the local file path
                 with open(local_file_path, 'wb') as f:
+                    f.write(file_response.content)
+                file_size = os.path.getsize(local_file_path)
+                print(f"✅ Downloaded file: {original_filename} ({file_size} bytes)")
+                # Verify file exists and is readable
+                if not os.path.exists(local_file_path):
+                    print(f"⚠️ Warning: File saved but cannot be found at {local_file_path}")
+                    local_file_path = None
             except requests.exceptions.RequestException as e:
                 error_message = f"[FILE DOWNLOAD ERROR: Could not fetch file: {e}]"
                 print(f"⚠️ {error_message}")
+                local_file_path = None
+            except Exception as e:
+                error_message = f"[FILE SAVE ERROR: {e}]"
+                print(f"⚠️ {error_message}")
+                local_file_path = None
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            # Pass file_path to agent
+            submitted_answer = agent(question_text, local_file_path)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
              print(f"Error running agent on task {task_id}: {e}")
+             print(traceback.format_exc())
              results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
     run_button.click(
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
     space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID")
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup:
         print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
         print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")