Spaces:
Sleeping
Sleeping
File size: 5,308 Bytes
7b33cb7 e3a58b9 69521c1 e3a58b9 7b33cb7 e3a58b9 e2304c7 e3a58b9 e2304c7 e3a58b9 a1b789f e3a58b9 976bea9 e3a58b9 69521c1 e3a58b9 69521c1 e3a58b9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | import streamlit as st
from typing import Dict, Any
import requests
from models import LinkNode, Status
from typing import Dict, Any
import os
from dotenv import load_dotenv
load_dotenv()
def display_map(link_map: Dict[str, Any]):
"""
Displays the entire link map in collapsible Streamlit expanders.
If a link is not relevant based on its overview, it's tagged with a red icon.
"""
st.header("π Full Exploration Map")
if not link_map:
st.info("The exploration map is empty.")
return
validated_map = {}
for href, dict_node in link_map.items():
try:
node = LinkNode.model_validate(dict_node)
validated_map[href] = node
except Exception as e:
st.error(f"Failed to validate data for {href}. Skipping. Error: {e}")
continue
sorted_map = sorted(validated_map.items(), key=lambda item: item[1].depth)
for href, node in sorted_map:
st.divider()
st.subheader(f"π [{href}]({href})")
if node.parent:
st.caption(f"Found on: {node.parent}")
status = node.overview.status
if status == Status.RELEVANT:
st.success(f"**Status: RELEVANT** β
")
elif status == Status.IRRELEVANT:
st.warning(f"**Status: IRRELEVANT** β οΈ - Page deemed not relevant to search criteria.")
elif status == Status.FAILED:
st.error(f"**Status: FAILED** β - Could not scrape or analyze this page.")
else:
st.info(f"**Status: UNKNOWN** π‘")
st.markdown("**π Summary**")
st.info(node.overview.summary)
with st.expander("View Full Extracted Data and Found Links"):
st.markdown("##### π Full Extracted Data")
overview_data = node.overview.model_dump()
display_order = ['details', 'required_docs', 'price', 'SLA']
items_to_display = []
for key in display_order:
value = overview_data.get(key)
if value:
title = key.replace('_', ' ').capitalize()
items_to_display.append((title, str(value)))
for i, (title, value) in enumerate(items_to_display):
st.markdown(f"**{title}**")
st.markdown(value)
if i < len(items_to_display) - 1:
st.markdown("---")
st.markdown("##### π Links Found on This Page")
if node.child:
st.write(f"Found **{len(node.child)}** link(s):")
links_text = "\n".join(f"- {link}" for link in node.child)
st.text_area("Links", links_text, height=150, key=f"links_{href}")
else:
st.write("No valid links were found on this page.")
def main():
st.title("π€ Browser Agent: Visa Data Extractor (Streamlit Demo)")
st.markdown("Enter an API Key and a URL to start a recursive web crawl for structured visa information.")
with st.sidebar:
st.header("Configuration")
default_url = "https://www.netherlandsworldwide.nl/visa-the-netherlands/visa-application-form"
url = st.text_input("Starting URL (e.g., website.com)", default_url)
max_depth = st.slider("Max Exploration Depth", min_value=1, max_value=5, value=1)
st.markdown("""
**Note:** Depth 1 is fast. Depth 2 or 3 can be **very slow** and consume many tokens.
""")
# --- Main Execution ---
if st.button("Start Exploration and Extraction"):
print(f"starting crawl for {url} with depth {max_depth}")
if not url:
st.error("Please enter a valid Starting URL.")
return
with st.spinner(f"Crawling {url} up to depth {max_depth}... (This may take a while)"):
BASE_URI = os.getenv("BASE_URI", "http://localhost:5000")
print(f"{BASE_URI}/scrape")
try:
result = requests.post(
f"{BASE_URI}/scrape",
headers={"Content-Type": "application/json"},
json={
"url": url,
"max_depth": max_depth
}
)
except requests.exceptions.ConnectionError:
st.error(f"Connection Error: Could not connect to the Flask API at {BASE_URI}. Please ensure your Flask app is running (e.g., `flask run`).")
return
except Exception as e:
st.exception(f"An unexpected error occurred during the crawl: {e}")
return
if result.status_code != 200:
st.error(f"Exploration failed with status {result.status_code}: {result.text}")
return
data = result.json()
display_map(data.get("link_map", {}))
st.subheader("π° Accumulated Token Usage (All LLM Calls)")
token_usage = data.get("token_usage", {"input": 0, "output": 0, "total": 0})
st.write(f"**Input Tokens:** {token_usage['input']}")
st.write(f"**Output Tokens:** {token_usage['output']}")
st.write(f"**Total Tokens:** {token_usage['total']}")
if __name__ == "__main__":
main()
|