import streamlit as st from typing import Dict, Any import requests from models import LinkNode, Status from typing import Dict, Any import os from dotenv import load_dotenv load_dotenv() def display_map(link_map: Dict[str, Any]): """ Displays the entire link map in collapsible Streamlit expanders. If a link is not relevant based on its overview, it's tagged with a red icon. """ st.header("🌐 Full Exploration Map") if not link_map: st.info("The exploration map is empty.") return validated_map = {} for href, dict_node in link_map.items(): try: node = LinkNode.model_validate(dict_node) validated_map[href] = node except Exception as e: st.error(f"Failed to validate data for {href}. Skipping. Error: {e}") continue sorted_map = sorted(validated_map.items(), key=lambda item: item[1].depth) for href, node in sorted_map: st.divider() st.subheader(f"📄 [{href}]({href})") if node.parent: st.caption(f"Found on: {node.parent}") status = node.overview.status if status == Status.RELEVANT: st.success(f"**Status: RELEVANT** ✅") elif status == Status.IRRELEVANT: st.warning(f"**Status: IRRELEVANT** ⚠️ - Page deemed not relevant to search criteria.") elif status == Status.FAILED: st.error(f"**Status: FAILED** ❌ - Could not scrape or analyze this page.") else: st.info(f"**Status: UNKNOWN** 🟡") st.markdown("**📝 Summary**") st.info(node.overview.summary) with st.expander("View Full Extracted Data and Found Links"): st.markdown("##### 📋 Full Extracted Data") overview_data = node.overview.model_dump() display_order = ['details', 'required_docs', 'price', 'SLA'] items_to_display = [] for key in display_order: value = overview_data.get(key) if value: title = key.replace('_', ' ').capitalize() items_to_display.append((title, str(value))) for i, (title, value) in enumerate(items_to_display): st.markdown(f"**{title}**") st.markdown(value) if i < len(items_to_display) - 1: st.markdown("---") st.markdown("##### 🔗 Links Found on This Page") if node.child: st.write(f"Found **{len(node.child)}** link(s):") links_text = "\n".join(f"- {link}" for link in node.child) st.text_area("Links", links_text, height=150, key=f"links_{href}") else: st.write("No valid links were found on this page.") def main(): st.title("🤖 Browser Agent: Visa Data Extractor (Streamlit Demo)") st.markdown("Enter an API Key and a URL to start a recursive web crawl for structured visa information.") with st.sidebar: st.header("Configuration") default_url = "https://www.netherlandsworldwide.nl/visa-the-netherlands/visa-application-form" url = st.text_input("Starting URL (e.g., website.com)", default_url) max_depth = st.slider("Max Exploration Depth", min_value=1, max_value=5, value=1) st.markdown(""" **Note:** Depth 1 is fast. Depth 2 or 3 can be **very slow** and consume many tokens. """) # --- Main Execution --- if st.button("Start Exploration and Extraction"): print(f"starting crawl for {url} with depth {max_depth}") if not url: st.error("Please enter a valid Starting URL.") return with st.spinner(f"Crawling {url} up to depth {max_depth}... (This may take a while)"): BASE_URI = os.getenv("BASE_URI", "http://localhost:5000") print(f"{BASE_URI}/scrape") try: result = requests.post( f"{BASE_URI}/scrape", headers={"Content-Type": "application/json"}, json={ "url": url, "max_depth": max_depth } ) except requests.exceptions.ConnectionError: st.error(f"Connection Error: Could not connect to the Flask API at {BASE_URI}. Please ensure your Flask app is running (e.g., `flask run`).") return except Exception as e: st.exception(f"An unexpected error occurred during the crawl: {e}") return if result.status_code != 200: st.error(f"Exploration failed with status {result.status_code}: {result.text}") return data = result.json() display_map(data.get("link_map", {})) st.subheader("💰 Accumulated Token Usage (All LLM Calls)") token_usage = data.get("token_usage", {"input": 0, "output": 0, "total": 0}) st.write(f"**Input Tokens:** {token_usage['input']}") st.write(f"**Output Tokens:** {token_usage['output']}") st.write(f"**Total Tokens:** {token_usage['total']}") if __name__ == "__main__": main()