Spaces:

gabejavitt
/

agentCourse

Sleeping

App Files Files Community

gabejavitt commited on Feb 10

Commit

f6e496f

verified ·

1 Parent(s): d2a3d72

Update app.py

Browse files

Files changed (1) hide show

app.py +273 -146

app.py CHANGED Viewed

@@ -49,6 +49,7 @@ from langgraph.graph import START, END, StateGraph
 from langchain_groq import ChatGroq
 from langchain_google_genai import ChatGoogleGenerativeAI
 # RAG
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
@@ -763,7 +764,7 @@ class SearchInput(BaseModel):
 @tool(args_schema=SearchInput)
 @retry_with_backoff(max_retries=3)
 def search_tool(query: str) -> str:
-    """Web search with caching"""
     start_time = time.time()
     try:
@@ -785,7 +786,15 @@ def search_tool(query: str) -> str:
         print(f"🔍 Searching: {query}")
         search = DuckDuckGoSearchRun()
         result = search.run(query)
         if not result or len(result) < 50:
@@ -1212,110 +1221,122 @@ class ChessAnalysisInput(BaseModel):
     description: str = Field(description="Context about position", default="")
 @tool(args_schema=ChessAnalysisInput)
-def analyze_chess_position(image_path: str, description: str = "") -> str:
     """
-    Analyze chess position using Stockfish.
-    Requires stockfish binary installed.
     """
     start_time = time.time()
     try:
-        print(f"♟️ Analyzing chess: {image_path}")
-        # Find image
-        chess_image = find_file(image_path)
-        if not chess_image and os.path.exists(image_path):
-            chess_image = Path(image_path)
-        if not chess_image or not chess_image.exists():
-            raise FileNotFoundError(f"Chess image not found: {image_path}")
-        # Extract FEN using Gemini Vision
         GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY")
         if not GOOGLE_API_KEY:
             raise ValueError("GEMINI_API_KEY not set")
-        img = Image.open(chess_image)
-        if img.mode not in ['RGB', 'RGBA']:
-            img = img.convert('RGB')
-        buffered = io.BytesIO()
-        img.save(buffered, format="JPEG")
-        img_base64 = base64.b64encode(buffered.getvalue()).decode()
-        vision_llm = ChatGoogleGenerativeAI(
             model="gemini-2.5-flash",
             google_api_key=GOOGLE_API_KEY,
             temperature=0
         )
-        fen_prompt = """Analyze this chess board and provide FEN notation.
-Return ONLY the FEN string, nothing else.
-Format: piece_placement active_color castling en_passant halfmove fullmove"""
         message = HumanMessage(
             content=[
-                {"type": "text", "text": fen_prompt},
-                {"type": "image_url", "image_url": f"data:image/jpeg;base64,{img_base64}"}
             ]
         )
-        response = vision_llm.invoke([message])
         fen = response.content.strip()
-        # Clean FEN
-        for line in fen.split('\n'):
-            line = line.strip().replace('```', '').replace('fen', '')
-            if '/' in line and ' ' in line:
-                fen = line
-                break
         print(f"✓ FEN: {fen}")
         # Analyze with Stockfish
         try:
-            import chess
-            from stockfish import Stockfish
-        except ImportError:
-            raise ImportError("Need: pip install python-chess stockfish")
-        # Find Stockfish binary
-        stockfish_paths = [
-            "/usr/games/stockfish",
-            "/usr/local/bin/stockfish",
-            "/usr/bin/stockfish",
-            "stockfish"
-        ]
-        stockfish_path = None
-        for path in stockfish_paths:
-            if os.path.exists(path):
-                stockfish_path = path
-                break
-        if not stockfish_path:
-            raise FileNotFoundError("Stockfish binary not found. Install: apt-get install stockfish")
-        stockfish = Stockfish(path=stockfish_path, depth=20)
-        stockfish.set_fen_position(fen)
-        best_move_uci = stockfish.get_best_move()
-        if not best_move_uci:
-            raise ValueError("No legal move found")
-        # Convert to SAN
-        board = chess.Board(fen)
-        uci_move = chess.Move.from_uci(best_move_uci)
-        san_move = board.san(uci_move)
-        print(f"✓ Best move: {san_move}")
         telemetry.record_call("analyze_chess_position", time.time() - start_time, True)
-        return san_move
     except Exception as e:
         telemetry.record_call("analyze_chess_position", time.time() - start_time, False)
-        raise ToolError("analyze_chess_position", e, "Check if stockfish installed")
 class ImageAnalysisInput(BaseModel):
     file_path: str = Field(description="Image file path")
@@ -1636,80 +1657,149 @@ class ScrapeInput(BaseModel):
 @tool(args_schema=ScrapeInput)
 @retry_with_backoff(max_retries=3)
 def scrape_and_retrieve(url: str, query: str) -> str:
-    """Fetch and search full webpage with RAG"""
     start_time = time.time()
     try:
-        # Input validation
-        is_valid, msg = validate_tool_inputs("scrape_and_retrieve", {"url": url})
         if not is_valid:
             raise ValueError(msg)
-        if not rag_manager.is_ready():
-            rag_manager.initialize()
-        if not rag_manager.is_ready():
-            raise RuntimeError("RAG not available")
         print(f"🌐 Scraping: {url}")
-        print(f"   Looking for: {query[:100]}...")
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
-        }
-        response = requests.get(url, headers=headers, timeout=20)
-        response.raise_for_status()
-        soup = BeautifulSoup(response.text, 'html.parser')
-        # Remove noise
-        for tag in soup(["script", "style", "nav", "footer", "aside", "header", "iframe"]):
-            tag.extract()
-        main = soup.find('main') or soup.find('article') or soup.find('div', class_='mw-parser-output') or soup.body
-        if not main:
-            raise ValueError("Could not find main content")
-        text = main.get_text(separator='\n', strip=True)
-        lines = [l.strip() for l in text.splitlines() if l.strip()]
-        text = '\n'.join(lines)
-        if len(text) < 50:
-            raise ValueError(f"Content too short ({len(text)} chars)")
         print(f"✓ Extracted {len(text)} characters")
         # RAG retrieval
-        chunks = rag_manager.text_splitter.split_text(text)
-        print(f"✓ Created {len(chunks)} chunks")
-        docs = [Document(page_content=c, metadata={"source": url}) for c in chunks]
-        db = FAISS.from_documents(docs, rag_manager.embeddings)
-        retriever = db.as_retriever(search_kwargs={"k": 5})
-        retrieved = retriever.invoke(query)
-        # Clean up memory
-        del db
-        del retriever
-        import gc
-        gc.collect()
-        if not retrieved:
-            return f"No info found for: '{query}'. Try different query."
-        print(f"✓ Found {len(retrieved)} relevant chunks")
-        context = "\n\n---\n\n".join([f"[Section {i+1}]\n{d.page_content}" for i, d in enumerate(retrieved)])
         telemetry.record_call("scrape_and_retrieve", time.time() - start_time, True)
-        return truncate_if_needed(f"From {url}:\n\n{context}")
-    except requests.Timeout:
-        telemetry.record_call("scrape_and_retrieve", time.time() - start_time, False)
-        raise ToolError("scrape_and_retrieve", TimeoutError("Request timed out"), "Check URL or try later")
     except Exception as e:
         telemetry.record_call("scrape_and_retrieve", time.time() - start_time, False)
         raise ToolError("scrape_and_retrieve", e)
@@ -1746,34 +1836,37 @@ def analyze_video(file_path: str, query: str) -> str:
         if not GOOGLE_API_KEY:
             raise ValueError("GEMINI_API_KEY not set")
-        # Upload video to Gemini
-        print(f"   Uploading video to Gemini...")
-        import google.generativeai as genai
-        genai.configure(api_key=GOOGLE_API_KEY)
-        video_file = genai.upload_file(path=str(video_path))
-        print(f"   Waiting for processing...")
-        while video_file.state.name == "PROCESSING":
-            time.sleep(2)
-            video_file = genai.get_file(video_file.name)
-        if video_file.state.name == "FAILED":
-            raise RuntimeError("Video processing failed")
-        # Analyze with Gemini
         print(f"   Analyzing with Gemini...")
-        model = genai.GenerativeModel("gemini-2.0-flash-exp")
-        response = model.generate_content([
-            video_file,
-            query
-        ])
-        result = response.text
-        # Clean up
-        genai.delete_file(video_file.name)
         print(f"✓ Analysis complete: {len(result)} chars")
@@ -1810,6 +1903,7 @@ defined_tools = [
     create_plan,
     reflect_on_progress,
     validate_answer,
     # Core tools
     search_tool,
@@ -2071,16 +2165,17 @@ Turn 5: final_answer_tool("3")
 REMEMBER: wikipedia_search() wants just the SUBJECT NAME!
 ═══════════════════════════════════════════════════════════════
-**YOUTUBE VIDEO QUESTIONS** (Network restrictions):
-⚠️ YouTube URLs may be blocked on HuggingFace Spaces!
-If question mentions YouTube AND a local .mp4 file exists:
-→ Use analyze_image tool on the local video file instead
-→ Or use audio_transcription_tool for audio content
 Example:
-Q: "In video https://youtube.com/..., what happens?"
-[FILE: task_123.mp4]
-✅ CORRECT: analyze_image("files/task_123.mp4", "what happens in video")
 ❌ WRONG: get_youtube_transcript("https://youtube.com/...")
@@ -2200,6 +2295,36 @@ REMEMBER: One tool per turn. No reasoning without tools. Exact answer format.
         # Start with Groq
         self.llm_with_tools = self.groq_llm
         self.current_llm = "groq"
         # Build agent graph
         def agent_node(state: AgentState):
@@ -2208,6 +2333,8 @@ REMEMBER: One tool per turn. No reasoning without tools. Exact answer format.
             print(f"\n{'='*70}")
             print(f"🤖 AGENT TURN {current_turn}/{config.MAX_TURNS}")
             print('='*70)
             if current_turn > config.MAX_TURNS:
                 return {

 from langchain_groq import ChatGroq
 from langchain_google_genai import ChatGoogleGenerativeAI
 # RAG
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
 @tool(args_schema=SearchInput)
 @retry_with_backoff(max_retries=3)
 def search_tool(query: str) -> str:
+    """Web search with caching and language filtering"""
     start_time = time.time()
     try:
         print(f"🔍 Searching: {query}")
+        # DuckDuckGo doesn't support these params directly,
+        # but we can filter by adding language hints
+        # For English results, add hint to query
         search = DuckDuckGoSearchRun()
+        # Add language hint to force English results
+        if not any(keyword in query.lower() for keyword in ['lang:', 'region:']):
+            query = f"{query} lang:en"
         result = search.run(query)
         if not result or len(result) < 50:
     description: str = Field(description="Context about position", default="")
 @tool(args_schema=ChessAnalysisInput)
+def analyze_chess_position(file_path: str) -> str:
     """
+    Analyze chess position from image using Gemini Vision + Stockfish.
+    Extracts FEN, analyzes best move.
     """
     start_time = time.time()
     try:
+        print(f"♟️ Analyzing chess: {file_path}")
+        # Find file
+        image_path = find_file(file_path)
+        if not image_path and os.path.exists(file_path):
+            image_path = Path(file_path)
+        if not image_path or not image_path.exists():
+            raise FileNotFoundError(f"Image not found: {file_path}")
         GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY")
         if not GOOGLE_API_KEY:
             raise ValueError("GEMINI_API_KEY not set")
+        # Read image as base64
+        with open(image_path, "rb") as f:
+            image_data = base64.b64encode(f.read()).decode("utf-8")
+        # Use Gemini to extract FEN
+        llm = ChatGoogleGenerativeAI(
             model="gemini-2.5-flash",
             google_api_key=GOOGLE_API_KEY,
             temperature=0
         )
         message = HumanMessage(
             content=[
+                {
+                    "type": "text",
+                    "text": """Analyze this chess position and provide the FEN notation.
+CRITICAL: The FEN string MUST include whose turn it is:
+- If White to move: end with "w - - 0 1"
+- If Black to move: end with "b - - 0 1"
+Look at the board carefully to determine whose turn it is based on:
+1. Any text in the image indicating whose turn
+2. The position context
+3. If unclear, look at piece positions
+Respond with ONLY the FEN string, nothing else."""
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/png;base64,{image_data}"
+                    }
+                }
             ]
         )
+        response = llm.invoke([message])
         fen = response.content.strip()
         print(f"✓ FEN: {fen}")
+        # ===== FIX: Parse whose turn it is from FEN =====
+        # FEN format: position w/b castling en-passant halfmove fullmove
+        fen_parts = fen.split()
+        # Ensure we have the turn indicator
+        if len(fen_parts) < 2:
+            # Default to white if not specified
+            fen = f"{fen} w - - 0 1"
+            fen_parts = fen.split()
+        # Get whose turn it is
+        turn = fen_parts[1] if len(fen_parts) > 1 else 'w'
+        print(f"✓ Turn: {'Black' if turn == 'b' else 'White'}")
+        # ===== END FIX =====
         # Analyze with Stockfish
         try:
+            board = chess.Board(fen)
+        except ValueError as e:
+            raise ValueError(f"Invalid FEN from Gemini: {fen}. Error: {e}")
+        # Configure Stockfish
+        stockfish_path = "/usr/games/stockfish"
+        if not os.path.exists(stockfish_path):
+            raise FileNotFoundError("Stockfish not found at /usr/games/stockfish")
+        engine = chess.engine.SimpleEngine.popen_uci(stockfish_path)
+        # ===== FIX: Analyze with appropriate depth =====
+        # For tactical positions (like mate puzzles), need deeper analysis
+        result = engine.analyse(board, chess.engine.Limit(depth=20))
+        # ===== END FIX =====
+        best_move = result["pv"][0]  # Principal variation (best line)
+        engine.quit()
+        # Convert to algebraic notation
+        move_san = board.san(best_move)
+        print(f"✓ Best move: {move_san}")
         telemetry.record_call("analyze_chess_position", time.time() - start_time, True)
+        # ===== FIX: Include turn info in response =====
+        turn_text = "Black" if turn == 'b' else "White"
+        return f"{move_san} ({turn_text} to move, from FEN: {fen})"
+        # ===== END FIX =====
     except Exception as e:
         telemetry.record_call("analyze_chess_position", time.time() - start_time, False)
+        raise ToolError("analyze_chess_position", e, "Check image quality and Stockfish installation")
 class ImageAnalysisInput(BaseModel):
     file_path: str = Field(description="Image file path")
 @tool(args_schema=ScrapeInput)
 @retry_with_backoff(max_retries=3)
 def scrape_and_retrieve(url: str, query: str) -> str:
+    """
+    Scrape webpage and retrieve relevant sections using RAG with smart fallbacks.
+    """
     start_time = time.time()
     try:
+        is_valid, msg = validate_tool_inputs("scrape_and_retrieve", {"url": url, "query": query})
         if not is_valid:
             raise ValueError(msg)
         print(f"🌐 Scraping: {url}")
+        print(f"   Looking for: {query[:50]}...")
+        # ===== TRY PRIMARY URL =====
+        try:
+            response = requests.get(url, timeout=15, headers={
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
+            })
+            response.raise_for_status()
+        except requests.exceptions.HTTPError as e:
+            if e.response.status_code == 404:
+                print(f"   ❌ 404 error, trying fallbacks...")
+                # ===== FALLBACK 1: Try alternative URL formats =====
+                if "wikipedia.org" in url:
+                    fallback_urls = []
+                    # Example: Wikipedia:Featured_articles/2016_November
+                    # Try: Wikipedia:Featured_articles#2016
+                    if "/20" in url and "_" in url:
+                        # Extract year
+                        import re
+                        year_match = re.search(r'/(\d{4})', url)
+                        if year_match:
+                            year = year_match.group(1)
+                            # Try anchor link format
+                            base_url = url.split('/20')[0]
+                            fallback_urls.append(f"{base_url}#{year}")
+                            # Try without year suffix
+                            fallback_urls.append(base_url)
+                    # Try with underscores replaced by spaces (URL encoded)
+                    if "_" in url:
+                        fallback_urls.append(url.replace("_", "%20"))
+                    # Try each fallback
+                    for fallback_url in fallback_urls:
+                        try:
+                            print(f"   Trying fallback: {fallback_url}")
+                            response = requests.get(fallback_url, timeout=15, headers={
+                                'User-Agent': 'Mozilla/5.0'
+                            })
+                            response.raise_for_status()
+                            url = fallback_url  # Update URL for later
+                            print(f"   ✓ Fallback succeeded!")
+                            break
+                        except:
+                            continue
+                    else:
+                        # All fallbacks failed
+                        # ===== FALLBACK 2: Use Wikipedia search =====
+                        print(f"   All URL fallbacks failed, trying Wikipedia search...")
+                        # Extract search terms from URL
+                        search_terms = url.split('/')[-1].replace('_', ' ').replace('%20', ' ')
+                        # Search Wikipedia
+                        search_url = f"https://en.wikipedia.org/w/api.php?action=opensearch&search={search_terms}&limit=1&format=json"
+                        search_response = requests.get(search_url, timeout=10)
+                        search_data = search_response.json()
+                        if len(search_data) > 3 and search_data[3]:
+                            # Found a result
+                            wiki_url = search_data[3][0]
+                            print(f"   ✓ Found via search: {wiki_url}")
+                            response = requests.get(wiki_url, timeout=15, headers={
+                                'User-Agent': 'Mozilla/5.0'
+                            })
+                            response.raise_for_status()
+                            url = wiki_url
+                        else:
+                            raise ToolError(
+                                "scrape_and_retrieve",
+                                Exception(f"404 and all fallbacks failed for {url}"),
+                                "Try using wikipedia_search tool to find the correct article first"
+                            )
+                else:
+                    # Non-Wikipedia 404
+                    raise
+            else:
+                # Other HTTP error
+                raise
+        # ===== END FALLBACKS =====
+        # Parse content
+        soup = BeautifulSoup(response.content, 'html.parser')
+        # Remove unwanted elements
+        for element in soup(['script', 'style', 'nav', 'header', 'footer']):
+            element.decompose()
+        text = soup.get_text(separator='\n', strip=True)
+        if len(text) < 100:
+            raise ValueError(f"Insufficient content extracted from {url}")
         print(f"✓ Extracted {len(text)} characters")
         # RAG retrieval
+        docs = [Document(page_content=text, metadata={"source": url})]
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=Config.CHUNK_SIZE,
+            chunk_overlap=Config.CHUNK_OVERLAP
+        )
+        chunks = text_splitter.split_documents(docs)
+        print(f"✓ Created {len(chunks)} chunks")
+        # Search for relevant chunks
+        vectorstore = FAISS.from_documents(chunks, rag_manager.embeddings)
+        retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
+        relevant_docs = retriever.invoke(query)
+        print(f"✓ Found {len(relevant_docs)} relevant chunks")
+        # Format results
+        results = []
+        for i, doc in enumerate(relevant_docs, 1):
+            content = doc.page_content.strip()
+            results.append(f"[Section {i}]\n{content}")
+        result = f"From {url}:\n\n" + "\n\n".join(results)
+        # Cleanup
+        del vectorstore
+        gc.collect()
         telemetry.record_call("scrape_and_retrieve", time.time() - start_time, True)
+        return truncate_if_needed(result)
     except Exception as e:
         telemetry.record_call("scrape_and_retrieve", time.time() - start_time, False)
         raise ToolError("scrape_and_retrieve", e)
         if not GOOGLE_API_KEY:
             raise ValueError("GEMINI_API_KEY not set")
+        # Read video as base64
+        print(f"   Reading video file...")
+        with open(video_path, "rb") as f:
+            video_data = base64.b64encode(f.read()).decode("utf-8")
+        # Use Gemini via LangChain
         print(f"   Analyzing with Gemini...")
+        llm = ChatGoogleGenerativeAI(
+            model="gemini-2.5-flash",
+            google_api_key=GOOGLE_API_KEY,
+            temperature=0
+        )
+        # Create message with video
+        message = HumanMessage(
+            content=[
+                {
+                    "type": "text",
+                    "text": query
+                },
+                {
+                    "type": "video_url",
+                    "video_url": {
+                        "url": f"data:video/mp4;base64,{video_data}"
+                    }
+                }
+            ]
+        )
+        response = llm.invoke([message])
+        result = response.content
         print(f"✓ Analysis complete: {len(result)} chars")
     create_plan,
     reflect_on_progress,
     validate_answer,
+    analyze_data_file,
     # Core tools
     search_tool,
 REMEMBER: wikipedia_search() wants just the SUBJECT NAME!
 ═══════════════════════════════════════════════════════════════
+**YOUTUBE VIDEO HANDLING:**
+⚠️ YouTube URLs are BLOCKED on HuggingFace Spaces!
+IF question mentions YouTube URL AND local video file exists:
+→ Use analyze_video tool on the local .mp4 file instead
+→ The local file contains the same video content
 Example:
+Question: "In video https://youtube.com/watch?v=abc, how many birds?"
+File: files/task_123.mp4
+✅ CORRECT: analyze_video("files/task_123.mp4", "count bird species")
 ❌ WRONG: get_youtube_transcript("https://youtube.com/...")
         # Start with Groq
         self.llm_with_tools = self.groq_llm
         self.current_llm = "groq"
+        def prune_context_if_needed(state: AgentState) -> AgentState:
+        """
+        Prune conversation history if it's getting too long.
+        Keeps system message + recent history to stay under token limits.
+        """
+        messages = state.get("messages", [])
+        # Keep first message (system prompt) + last N messages
+        MAX_MESSAGES = 20  # Adjust based on your needs
+        if len(messages) > MAX_MESSAGES:
+            print(f"⚠️ Context pruning: {len(messages)} messages → {MAX_MESSAGES}")
+            # Always keep system message (if it exists)
+            system_msg = None
+            if messages and isinstance(messages[0], SystemMessage):
+                system_msg = messages[0]
+                messages = messages[1:]
+            # Keep only recent messages
+            recent_messages = messages[-(MAX_MESSAGES-1):]
+            # Reconstruct
+            if system_msg:
+                state["messages"] = [system_msg] + recent_messages
+            else:
+                state["messages"] = recent_messages
+        return state
         # Build agent graph
         def agent_node(state: AgentState):
             print(f"\n{'='*70}")
             print(f"🤖 AGENT TURN {current_turn}/{config.MAX_TURNS}")
             print('='*70)
+            state = prune_context_if_needed(state)
             if current_turn > config.MAX_TURNS:
                 return {