Spaces:
Build error
Build error
request working
Browse files- src/app.py +70 -179
src/app.py
CHANGED
|
@@ -1,10 +1,26 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import asyncio
|
| 3 |
-
import
|
| 4 |
-
|
| 5 |
-
|
|
|
|
| 6 |
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
def get_id_from_input(val: str) -> Optional[int]:
|
| 10 |
try:
|
|
@@ -12,186 +28,67 @@ def get_id_from_input(val: str) -> Optional[int]:
|
|
| 12 |
except Exception:
|
| 13 |
return None
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
async with session.get(url) as resp:
|
| 25 |
-
html = await resp.text()
|
| 26 |
-
if delay > 0:
|
| 27 |
-
await asyncio.sleep(delay)
|
| 28 |
-
soup = BeautifulSoup(html, "html.parser")
|
| 29 |
-
if soup.string == "Non-numeric id supplied. Aborting.":
|
| 30 |
-
return None
|
| 31 |
-
if (
|
| 32 |
-
soup.p is not None
|
| 33 |
-
and soup.p.string
|
| 34 |
-
== "You have specified an ID that does not exist in the database. Please back up and try again."
|
| 35 |
-
):
|
| 36 |
-
return None
|
| 37 |
-
h2 = soup.find("h2")
|
| 38 |
-
name = h2.get_text(strip=True) if h2 and hasattr(h2, "get_text") else str(record_id)
|
| 39 |
-
institution = None
|
| 40 |
-
year = None
|
| 41 |
-
for inst in soup.find_all("div", style="line-height: 30px; text-align: center; margin-bottom: 1ex"):
|
| 42 |
-
if hasattr(inst, "find"):
|
| 43 |
-
span1 = inst.find("span")
|
| 44 |
-
if span1 and hasattr(span1, "find"):
|
| 45 |
-
span2 = span1.find("span")
|
| 46 |
-
if span2 and hasattr(span2, "text"):
|
| 47 |
-
institution = span2.text
|
| 48 |
-
if span1 and hasattr(span1, "contents") and span1.contents:
|
| 49 |
-
y = span1.contents[-1]
|
| 50 |
-
if isinstance(y, str):
|
| 51 |
-
y = y.strip()
|
| 52 |
-
if y:
|
| 53 |
-
y = y.split(",")[0].strip()
|
| 54 |
-
if y.isdigit():
|
| 55 |
-
year = int(y)
|
| 56 |
-
advisors = []
|
| 57 |
-
for a in soup.find_all(string=lambda s: s and ("Advisor" in cast(str, s) or "Promotor" in cast(str, s))):
|
| 58 |
-
# Ensure 'a' itself is treated as a string for the 'in' check
|
| 59 |
-
if "Advisor: Unknown" in str(a):
|
| 60 |
-
continue
|
| 61 |
-
next_tag = a.find_next() if hasattr(a, "find_next") else None
|
| 62 |
-
if next_tag and hasattr(next_tag, "attrs") and "href" in next_tag.attrs:
|
| 63 |
-
try:
|
| 64 |
-
advisors.append(int(next_tag.attrs["href"].split("=")[-1]))
|
| 65 |
-
except Exception:
|
| 66 |
-
pass
|
| 67 |
-
table = soup.find("table")
|
| 68 |
-
descendants = []
|
| 69 |
-
if table and hasattr(table, "find_all"):
|
| 70 |
-
for a in table.find_all("a"):
|
| 71 |
-
if hasattr(a, "attrs") and "href" in a.attrs:
|
| 72 |
-
try:
|
| 73 |
-
descendants.append(int(a.attrs["href"].split("=")[-1]))
|
| 74 |
-
except Exception:
|
| 75 |
-
pass
|
| 76 |
-
record = {
|
| 77 |
-
"id": record_id,
|
| 78 |
-
"name": name,
|
| 79 |
-
"institution": institution,
|
| 80 |
-
"year": year,
|
| 81 |
-
"advisors": advisors,
|
| 82 |
-
"descendants": descendants,
|
| 83 |
}
|
| 84 |
-
if cache is not None:
|
| 85 |
-
cache[record_id] = record
|
| 86 |
-
return record
|
| 87 |
|
| 88 |
-
async def
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
visited: Set[int],
|
| 96 |
-
delay: float = 0.0,
|
| 97 |
-
):
|
| 98 |
-
if rid in visited:
|
| 99 |
-
return
|
| 100 |
-
visited.add(rid)
|
| 101 |
-
async with semaphore:
|
| 102 |
-
rec = await fetch_record(rid, session, cache, delay)
|
| 103 |
-
if rec is not None:
|
| 104 |
-
tree[rid] = rec
|
| 105 |
-
await progress_cb(tree)
|
| 106 |
-
tasks = [
|
| 107 |
-
fetch_advisors_parallel(
|
| 108 |
-
adv, session, cache, semaphore, tree, progress_cb, visited, delay
|
| 109 |
-
)
|
| 110 |
-
for adv in rec["advisors"]
|
| 111 |
-
]
|
| 112 |
-
if tasks:
|
| 113 |
-
await asyncio.gather(*tasks)
|
| 114 |
|
| 115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
lines = [
|
| 117 |
"digraph G {",
|
| 118 |
-
" rankdir=TB;
|
| 119 |
-
' node [shape=box, style="rounded,filled", fillcolor=lightyellow];
|
| 120 |
' edge [arrowhead=vee];'
|
| 121 |
]
|
| 122 |
-
|
| 123 |
-
# Group nodes by year
|
| 124 |
-
nodes_by_year: Dict[Optional[int], List[int]] = {}
|
| 125 |
-
for node_id, node_data in tree.items():
|
| 126 |
-
year = node_data.get("year") # Might be None
|
| 127 |
-
if year not in nodes_by_year:
|
| 128 |
-
nodes_by_year[year] = []
|
| 129 |
-
nodes_by_year[year].append(node_id)
|
| 130 |
-
|
| 131 |
# Define nodes and their labels
|
| 132 |
-
for node_id, node in
|
| 133 |
name = node.get("name", str(node_id))
|
| 134 |
year_str = f" ({node.get('year')})" if node.get('year') is not None else " (Year Unknown)"
|
| 135 |
label = f"{name}{year_str}"
|
| 136 |
-
|
| 137 |
-
tooltip = f"ID: {node_id}\nName: {name}\nYear: {node.get('year', 'N/A')}\nInstitution: {node.get('institution', 'N/A')}"
|
| 138 |
lines.append(f' "{node_id}" [label="{label}", tooltip="{tooltip}"];')
|
| 139 |
-
|
| 140 |
# Define edges
|
| 141 |
-
for node_id, node in
|
| 142 |
for adv_id in node.get("advisors", []):
|
| 143 |
-
if adv_id in
|
| 144 |
lines.append(f' "{adv_id}" -> "{node_id}";')
|
| 145 |
-
|
| 146 |
-
# Add rank constraints for years
|
| 147 |
-
# Sort years, placing None (unknown year) perhaps at the top or bottom.
|
| 148 |
-
# For this example, None years will not be part of explicit rank=same groups.
|
| 149 |
-
# Dot will place them based on connections.
|
| 150 |
-
# Consider creating a specific rank for "Unknown Year" if desired.
|
| 151 |
-
|
| 152 |
-
# Sort known years.
|
| 153 |
-
# Years are typically displayed with earlier years at the top (if rankdir=TB)
|
| 154 |
-
# or left (if rankdir=LR). Dot usually handles this naturally with directed edges.
|
| 155 |
-
# The rank=same constraint is more about aligning nodes *within* the same year.
|
| 156 |
-
sorted_known_years = sorted([y for y in nodes_by_year.keys() if y is not None])
|
| 157 |
-
|
| 158 |
-
for year in sorted_known_years:
|
| 159 |
-
nodes_in_year = nodes_by_year[year]
|
| 160 |
-
if len(nodes_in_year) > 0: # Create a rank group even for single nodes if you want to ensure year separation
|
| 161 |
-
# Invisible subgraph for ranking
|
| 162 |
-
lines.append(f" subgraph cluster_year_{year} {{") # Naming cluster helps visually if you add style
|
| 163 |
-
lines.append(" label=\"\"; // No visible label for the cluster itself")
|
| 164 |
-
lines.append(" style=invis;")
|
| 165 |
-
lines.append(" {")
|
| 166 |
-
lines.append(" rank=same;")
|
| 167 |
-
# Add nodes to the rank group
|
| 168 |
-
for node_id in nodes_in_year:
|
| 169 |
-
lines.append(f' "{node_id}";')
|
| 170 |
-
lines.append(" }")
|
| 171 |
-
lines.append(" }")
|
| 172 |
-
|
| 173 |
-
# Handle nodes with unknown years (year is None)
|
| 174 |
-
# Option 1: Let them float (current behavior if not explicitly ranked)
|
| 175 |
-
# Option 2: Group them in their own rank (e.g., at the top or bottom)
|
| 176 |
-
if None in nodes_by_year and nodes_by_year[None]:
|
| 177 |
-
lines.append(" subgraph cluster_year_unknown {")
|
| 178 |
-
lines.append(" label=\"\"; style=invis;")
|
| 179 |
-
lines.append(" {")
|
| 180 |
-
lines.append(" rank=same; // Or rank=min/max if you want them at very top/bottom")
|
| 181 |
-
for node_id in nodes_by_year[None]:
|
| 182 |
-
lines.append(f' "{node_id}";')
|
| 183 |
-
lines.append(" }")
|
| 184 |
-
lines.append(" }")
|
| 185 |
-
|
| 186 |
lines.append("}")
|
| 187 |
return "\n".join(lines)
|
| 188 |
|
| 189 |
def main():
|
| 190 |
-
st.title("Math Genealogy Ancestor Tree (
|
| 191 |
mgp_id_str = st.text_input("Enter MGP ID (integer):")
|
| 192 |
-
use_cache = st.checkbox("Use in-memory cache", value=True)
|
| 193 |
-
concurrency = st.slider("Max parallel requests", min_value=1, max_value=10, value=5)
|
| 194 |
-
delay = st.slider("Delay between requests (seconds)", min_value=0.0, max_value=2.0, value=0.2, step=0.05)
|
| 195 |
progress_placeholder = st.empty()
|
| 196 |
graph_placeholder = st.empty()
|
| 197 |
run_btn = st.button("Show Ancestor Tree")
|
|
@@ -200,28 +97,22 @@ def main():
|
|
| 200 |
if mgp_id is None:
|
| 201 |
st.error("Please enter a valid integer MGP ID.")
|
| 202 |
return
|
| 203 |
-
|
| 204 |
loop = asyncio.new_event_loop()
|
| 205 |
asyncio.set_event_loop(loop)
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
progress_placeholder.info(f"Ancestors found: {len(tree)}")
|
| 211 |
-
dot = tree_to_dot(tree)
|
| 212 |
-
graph_placeholder.graphviz_chart(dot)
|
| 213 |
async def runner():
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
visited: Set[int] = set()
|
| 218 |
-
await fetch_advisors_parallel(
|
| 219 |
-
mgp_id, session, cache, semaphore, tree, progress_cb, visited, delay
|
| 220 |
-
)
|
| 221 |
try:
|
| 222 |
loop.run_until_complete(runner())
|
| 223 |
progress_placeholder.success("Done!")
|
| 224 |
except Exception as e:
|
|
|
|
| 225 |
progress_placeholder.error(f"Error: {e}")
|
| 226 |
|
| 227 |
if __name__ == "__main__":
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import asyncio
|
| 3 |
+
import websockets
|
| 4 |
+
import json
|
| 5 |
+
import platform
|
| 6 |
+
from typing import Dict, Any, Optional, List, Literal, TypedDict, cast
|
| 7 |
|
| 8 |
+
GGRAPHER_URI = "wss://ggrphr.davidalber.net"
|
| 9 |
+
|
| 10 |
+
class StartNodeRequest(TypedDict):
|
| 11 |
+
recordId: int
|
| 12 |
+
getAdvisors: bool
|
| 13 |
+
getDescendants: bool
|
| 14 |
+
|
| 15 |
+
class RequestPayload(TypedDict):
|
| 16 |
+
kind: Literal["build-graph"]
|
| 17 |
+
options: Dict[Literal["reportingCallback"], bool]
|
| 18 |
+
startNodes: List[StartNodeRequest]
|
| 19 |
+
|
| 20 |
+
class ProgressCallback(TypedDict):
|
| 21 |
+
queued: int
|
| 22 |
+
fetching: int
|
| 23 |
+
done: int
|
| 24 |
|
| 25 |
def get_id_from_input(val: str) -> Optional[int]:
|
| 26 |
try:
|
|
|
|
| 28 |
except Exception:
|
| 29 |
return None
|
| 30 |
|
| 31 |
+
def make_payload(record_id: int) -> RequestPayload:
|
| 32 |
+
return {
|
| 33 |
+
"kind": "build-graph",
|
| 34 |
+
"options": {"reportingCallback": True},
|
| 35 |
+
"startNodes": [{
|
| 36 |
+
"recordId": record_id,
|
| 37 |
+
"getAdvisors": True,
|
| 38 |
+
"getDescendants": False,
|
| 39 |
+
}],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
}
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
+
async def get_graph(payload: RequestPayload, progress_cb=None) -> Dict[str, Any]:
|
| 43 |
+
def intify_record_keys(d: Dict[Any, Any]) -> Dict[Any, Any]:
|
| 44 |
+
if "nodes" in d:
|
| 45 |
+
ret = {k: v for k, v in d.items() if k != "nodes"}
|
| 46 |
+
ret["nodes"] = {int(k): v for k, v in d["nodes"].items()}
|
| 47 |
+
return ret
|
| 48 |
+
return d
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
+
async with websockets.connect( # type: ignore[attr-defined]
|
| 51 |
+
GGRAPHER_URI,
|
| 52 |
+
) as ws:
|
| 53 |
+
await ws.send(json.dumps(payload))
|
| 54 |
+
while True:
|
| 55 |
+
response_json = await ws.recv()
|
| 56 |
+
response = json.loads(response_json, object_hook=intify_record_keys)
|
| 57 |
+
response_payload = response.get("payload")
|
| 58 |
+
if response["kind"] == "graph":
|
| 59 |
+
return cast(Dict[str, Any], response_payload)
|
| 60 |
+
elif response["kind"] == "progress" and progress_cb:
|
| 61 |
+
progress = cast(ProgressCallback, response_payload)
|
| 62 |
+
progress_cb(progress)
|
| 63 |
+
else:
|
| 64 |
+
continue
|
| 65 |
+
|
| 66 |
+
def tree_to_dot(graph: Dict[str, Any]) -> str:
|
| 67 |
+
nodes = graph.get("nodes", {})
|
| 68 |
lines = [
|
| 69 |
"digraph G {",
|
| 70 |
+
" rankdir=TB;",
|
| 71 |
+
' node [shape=box, style="rounded,filled", fillcolor=lightyellow];',
|
| 72 |
' edge [arrowhead=vee];'
|
| 73 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
# Define nodes and their labels
|
| 75 |
+
for node_id, node in nodes.items():
|
| 76 |
name = node.get("name", str(node_id))
|
| 77 |
year_str = f" ({node.get('year')})" if node.get('year') is not None else " (Year Unknown)"
|
| 78 |
label = f"{name}{year_str}"
|
| 79 |
+
tooltip = f"ID: {node_id}\\nName: {name}\\nYear: {node.get('year', 'N/A')}\\nInstitution: {node.get('institution', 'N/A')}"
|
|
|
|
| 80 |
lines.append(f' "{node_id}" [label="{label}", tooltip="{tooltip}"];')
|
|
|
|
| 81 |
# Define edges
|
| 82 |
+
for node_id, node in nodes.items():
|
| 83 |
for adv_id in node.get("advisors", []):
|
| 84 |
+
if adv_id in nodes:
|
| 85 |
lines.append(f' "{adv_id}" -> "{node_id}";')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
lines.append("}")
|
| 87 |
return "\n".join(lines)
|
| 88 |
|
| 89 |
def main():
|
| 90 |
+
st.title("Math Genealogy Ancestor Tree (WebSocket API)")
|
| 91 |
mgp_id_str = st.text_input("Enter MGP ID (integer):")
|
|
|
|
|
|
|
|
|
|
| 92 |
progress_placeholder = st.empty()
|
| 93 |
graph_placeholder = st.empty()
|
| 94 |
run_btn = st.button("Show Ancestor Tree")
|
|
|
|
| 97 |
if mgp_id is None:
|
| 98 |
st.error("Please enter a valid integer MGP ID.")
|
| 99 |
return
|
| 100 |
+
payload = make_payload(mgp_id)
|
| 101 |
loop = asyncio.new_event_loop()
|
| 102 |
asyncio.set_event_loop(loop)
|
| 103 |
+
def progress_cb(progress):
|
| 104 |
+
progress_placeholder.info(
|
| 105 |
+
f"Queued: {progress['queued']} | Fetching: {progress['fetching']} | Done: {progress['done']}"
|
| 106 |
+
)
|
|
|
|
|
|
|
|
|
|
| 107 |
async def runner():
|
| 108 |
+
graph = await get_graph(payload, progress_cb)
|
| 109 |
+
dot = tree_to_dot(graph)
|
| 110 |
+
graph_placeholder.graphviz_chart(dot)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
try:
|
| 112 |
loop.run_until_complete(runner())
|
| 113 |
progress_placeholder.success("Done!")
|
| 114 |
except Exception as e:
|
| 115 |
+
print(f"Error: {e}")
|
| 116 |
progress_placeholder.error(f"Error: {e}")
|
| 117 |
|
| 118 |
if __name__ == "__main__":
|