Spaces:
Sleeping
Sleeping
| """ | |
| GENESIS-AI MCP Studio — Hugging Face Space | |
| ========================================== | |
| A one-file, production-leaning prototype that fuses: | |
| • MCP-style tool adapters (RCSB PDB, medRxiv, Raindrop, QuickChart, MeasureSpace) | |
| • Hugging Face Transformers (summarization, keyphrase extraction, NER, Q&A) | |
| • Agentic orchestration (tool-using graph with spec-like permissions) | |
| • Gradio UI for instant deployment on Hugging Face Spaces | |
| Run locally: | |
| pip install -U transformers accelerate torch gradio httpx pydantic python-dotenv rich | |
| HF_HOME=.hf_cache # optional local cache | |
| python app.py | |
| Deploy on Hugging Face Spaces: | |
| • Space type: Gradio | |
| • Add secrets in the Space Settings as environment variables (see .env keys below) | |
| .env (optional, set as secrets in HF Space): | |
| RAINDROP_TOKEN=... # for Raindrop.io adapter | |
| MEASURESPACE_API_KEY=... # weather/geocode adapter | |
| QUICKCHART_BASE=https://quickchart.io/chart | |
| Notes: | |
| - External adapters are permission-gated at call-time and can be expanded. | |
| - The medRxiv adapter uses a public JSON endpoint via crossref for robust search; switch to official APIs where available. | |
| - This is a wow-piece: clean architecture + real utility out-of-the-box. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import re | |
| import json | |
| import time | |
| from dataclasses import dataclass, field | |
| from typing import Any, Dict, List, Optional, Tuple | |
| import httpx | |
| import gradio as gr | |
| from pydantic import BaseModel | |
| from rich import print as rprint | |
| # ---------------------------- | |
| # Hugging Face model helpers | |
| # ---------------------------- | |
| from transformers import pipeline | |
| _SUMMARIZER = None | |
| _QA = None | |
| _NER = None | |
| _KEYPHRASE = None | |
| def get_summarizer(): | |
| global _SUMMARIZER | |
| if _SUMMARIZER is None: | |
| _SUMMARIZER = pipeline( | |
| "summarization", model="facebook/bart-large-cnn", device_map="auto" | |
| ) | |
| return _SUMMARIZER | |
| def get_qa(): | |
| global _QA | |
| if _QA is None: | |
| _QA = pipeline("question-answering", model="deepset/roberta-base-squad2", device_map="auto") | |
| return _QA | |
| def get_ner(): | |
| global _NER | |
| if _NER is None: | |
| _NER = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple", device_map="auto") | |
| return _NER | |
| def get_keyphrase(): | |
| """Simple keyphrase extractor via NER + heuristic; swap for a dedicated model if desired.""" | |
| global _KEYPHRASE | |
| if _KEYPHRASE is None: | |
| # We'll reuse NER under the hood to highlight key entities as phrases | |
| _KEYPHRASE = get_ner() | |
| return _KEYPHRASE | |
| # ---------------------------- | |
| # Minimal MCP-style abstractions | |
| # ---------------------------- | |
| class Permission(BaseModel): | |
| server: str | |
| scope: str # e.g., "read", "write" | |
| resource: str # e.g., "medrxiv", "raindrop" | |
| class ToolResult(BaseModel): | |
| ok: bool | |
| data: Any = None | |
| error: Optional[str] = None | |
| class Tool: | |
| name: str | |
| description: str | |
| requires: List[Permission] | |
| async def call(self, **kwargs) -> ToolResult: # to be implemented | |
| raise NotImplementedError | |
| # ---------------------------- | |
| # Adapters (MCP-like Servers) | |
| # ---------------------------- | |
| class MedRxivTool(Tool): | |
| name = "medrxiv.search" | |
| description = "Search medRxiv / bioRxiv via Crossref for recent preprints." | |
| requires = [Permission(server="crossref", scope="read", resource="literature")] | |
| async def call(self, query: str, max_results: int = 5) -> ToolResult: | |
| url = "https://api.crossref.org/works" | |
| params = { | |
| "query": query, | |
| "filter": "from-pub-date:2023-01-01,has-abstract:true", | |
| "rows": max_results, | |
| "select": "title,author,URL,abstract,issued,container-title" | |
| } | |
| try: | |
| async with httpx.AsyncClient(timeout=20) as client: | |
| resp = await client.get(url, params=params) | |
| resp.raise_for_status() | |
| items = resp.json().get("message", {}).get("items", []) | |
| results = [] | |
| for it in items: | |
| title = (it.get("title") or [""])[0] | |
| abstract = it.get("abstract") or "" | |
| # Crossref abstracts can include HTML; strip tags | |
| abstract = re.sub(r"<[^>]+>", " ", abstract) | |
| results.append({ | |
| "title": title, | |
| "authors": [a.get("family", "") for a in it.get("author", [])], | |
| "url": it.get("URL"), | |
| "venue": (it.get("container-title") or [""])[0], | |
| "date": (it.get("issued", {}).get("date-parts") or [[None]])[0][0], | |
| "abstract": abstract.strip(), | |
| }) | |
| return ToolResult(ok=True, data=results) | |
| except Exception as e: | |
| return ToolResult(ok=False, error=str(e)) | |
| class RCSBPDBTool(Tool): | |
| name = "rcsb.structure" | |
| description = "Lookup PDB structures by query and return metadata." | |
| requires = [Permission(server="rcsb", scope="read", resource="pdb")] | |
| async def call(self, query: str, max_results: int = 5) -> ToolResult: | |
| # Simple search via RCSB Search API | |
| # See: https://search.rcsb.org/#search-api | |
| endpoint = "https://search.rcsb.org/rcsbsearch/v2/query" | |
| payload = { | |
| "query": { | |
| "type": "terminal", | |
| "service": "text", | |
| "parameters": {"attribute": "rcsb_entry_container_identifiers.entry_id", "operator": "exact_match", "value": query} | |
| }, | |
| "return_type": "entry", | |
| "request_options": {"pager": {"start": 0, "rows": max_results}} | |
| } | |
| # If not an exact PDB id, fallback to full-text search | |
| if not re.fullmatch(r"[0-9][A-Za-z0-9]{3}", query): | |
| payload = { | |
| "query": {"type": "terminal", "service": "text", "parameters": {"value": query}}, | |
| "return_type": "entry", | |
| "request_options": {"pager": {"start": 0, "rows": max_results}} | |
| } | |
| try: | |
| async with httpx.AsyncClient(timeout=20) as client: | |
| resp = await client.post(endpoint, json=payload) | |
| resp.raise_for_status() | |
| ids = [x.get("identifier") for x in resp.json().get("result_set", [])] | |
| out = [] | |
| for pdb_id in ids: | |
| info = await client.get(f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}") | |
| if info.status_code == 200: | |
| out.append({"pdb_id": pdb_id, **info.json()}) | |
| return ToolResult(ok=True, data=out) | |
| except Exception as e: | |
| return ToolResult(ok=False, error=str(e)) | |
| class RaindropTool(Tool): | |
| name = "raindrop.save" | |
| description = "Save a URL to Raindrop.io (bookmarks)." | |
| requires = [Permission(server="raindrop", scope="write", resource="bookmarks")] | |
| async def call(self, url: str, title: Optional[str] = None, tags: Optional[List[str]] = None) -> ToolResult: | |
| token = os.getenv("RAINDROP_TOKEN") | |
| if not token: | |
| return ToolResult(ok=False, error="RAINDROP_TOKEN not set") | |
| try: | |
| async with httpx.AsyncClient(timeout=20) as client: | |
| headers = {"Authorization": f"Bearer {token}"} | |
| payload = {"link": url} | |
| if title: | |
| payload["title"] = title | |
| if tags: | |
| payload["tags"] = tags | |
| resp = await client.post("https://api.raindrop.io/rest/v1/raindrop", json=payload, headers=headers) | |
| resp.raise_for_status() | |
| return ToolResult(ok=True, data=resp.json()) | |
| except Exception as e: | |
| return ToolResult(ok=False, error=str(e)) | |
| class MeasureSpaceTool(Tool): | |
| name = "measure.weather" | |
| description = "Weather/geocode lookup via MeasureSpace (demo)." | |
| requires = [Permission(server="measurespace", scope="read", resource="weather")] | |
| async def call(self, location: str) -> ToolResult: | |
| # Placeholder: shows how you'd wire a hosted MCP; replace with actual endpoint/key | |
| key = os.getenv("MEASURESPACE_API_KEY") | |
| if not key: | |
| return ToolResult(ok=False, error="MEASURESPACE_API_KEY not set") | |
| # Example stub response | |
| return ToolResult(ok=True, data={"location": location, "summary": "Sunny demo", "tempC": 28}) | |
| class QuickChartTool(Tool): | |
| name = "quickchart.render" | |
| description = "Render a chart via QuickChart and return image URL." | |
| requires = [Permission(server="quickchart", scope="write", resource="chart")] | |
| async def call(self, labels: List[str], values: List[float], title: str = "Keyphrases") -> ToolResult: | |
| base = os.getenv("QUICKCHART_BASE", "https://quickchart.io/chart") | |
| cfg = { | |
| "type": "bar", | |
| "data": {"labels": labels, "datasets": [{"label": title, "data": values}]}, | |
| "options": {"plugins": {"legend": {"display": False}, "title": {"display": True, "text": title}}} | |
| } | |
| url = f"{base}?c={json.dumps(cfg)}" | |
| return ToolResult(ok=True, data={"url": url, "config": cfg}) | |
| # ---------------------------- | |
| # Agent Orchestrator | |
| # ---------------------------- | |
| class AgentContext: | |
| query: str | |
| goals: List[str] = field(default_factory=list) | |
| permissions: List[Permission] = field(default_factory=list) | |
| class GenesisAgent: | |
| def __init__(self): | |
| self.medrxiv = MedRxivTool() | |
| self.rcsb = RCSBPDBTool() | |
| self.raindrop = RaindropTool() | |
| self.weather = MeasureSpaceTool() | |
| self.chart = QuickChartTool() | |
| async def run_pipeline(self, ctx: AgentContext) -> Dict[str, Any]: | |
| """Main pipeline: | |
| 1) Literature search (medRxiv via Crossref) | |
| 2) Summarize abstracts with HF | |
| 3) Extract key entities/phrases | |
| 4) Optional: save links to Raindrop | |
| 5) Build a bar chart of salient keyphrases | |
| """ | |
| # 1) Literature | |
| lit = await self.medrxiv.call(query=ctx.query, max_results=6) | |
| if not lit.ok: | |
| return {"error": f"Literature search failed: {lit.error}"} | |
| articles = lit.data | |
| texts = [] | |
| for art in articles: | |
| blob = f"Title: {art['title']}\nVenue: {art['venue']} ({art['date']})\nAbstract: {art['abstract']}" | |
| texts.append(blob) | |
| # 2) Summarize | |
| summarizer = get_summarizer() | |
| summaries = [] | |
| for t in texts: | |
| # Chunk if too long for the model; simple truncation for brevity | |
| if len(t) > 3000: | |
| t = t[:3000] | |
| s = summarizer(t, max_length=200, min_length=80, do_sample=False)[0]["summary_text"] | |
| summaries.append(s) | |
| # 3) Keyphrase via NER | |
| ner = get_keyphrase() | |
| phrase_counts: Dict[str, int] = {} | |
| for s in summaries: | |
| ents = ner(s) | |
| for e in ents: | |
| phrase = e.get("word") | |
| if not phrase: | |
| continue | |
| phrase = phrase.strip() | |
| # Normalize B- / I- etc leftovers | |
| phrase = phrase.replace("##", "") | |
| phrase_counts[phrase] = phrase_counts.get(phrase, 0) + 1 | |
| # Top phrases | |
| top = sorted(phrase_counts.items(), key=lambda x: x[1], reverse=True)[:10] | |
| labels = [k for k, _ in top] or ["No phrases"] | |
| values = [v for _, v in top] or [1] | |
| # 4) Optional bookmark first three | |
| saved = [] | |
| if any(p.server == "raindrop" and p.scope == "write" for p in ctx.permissions): | |
| for art in articles[:3]: | |
| res = await self.raindrop.call(url=art["url"], title=art["title"], tags=["genesis-ai", "medrxiv"]) | |
| saved.append({"title": art["title"], "ok": res.ok}) | |
| # 5) Chart | |
| chart = await self.chart.call(labels=labels, values=values, title="Key Entities Across Summaries") | |
| return { | |
| "query": ctx.query, | |
| "articles": articles, | |
| "summaries": summaries, | |
| "keyphrases": top, | |
| "chart": chart.data if chart.ok else {"error": chart.error}, | |
| "bookmarks": saved, | |
| } | |
| # ---------------------------- | |
| # Gradio UI | |
| # ---------------------------- | |
| CSS = """ | |
| :root { --radius: 16px; } | |
| .gradio-container { font-family: ui-sans-serif, system-ui; } | |
| .box { border: 1px solid #e5e7eb; border-radius: var(--radius); padding: 16px; } | |
| .heading { font-size: 22px; font-weight: 700; margin-bottom: 8px; } | |
| .subtle { color: #6b7280; } | |
| .badge { display:inline-block; padding: 2px 8px; border-radius: 999px; background: #eef2ff; margin-right:6px; } | |
| .card { border: 1px solid #e5e7eb; border-radius: var(--radius); padding: 12px; } | |
| """ | |
| def render_articles(arts: List[Dict[str, Any]]) -> str: | |
| rows = [] | |
| for a in arts: | |
| t = a.get("title", "") | |
| u = a.get("url", "") | |
| v = a.get("venue", "") | |
| d = a.get("date", "") | |
| rows.append(f"<div class='card'><div class='heading'>{t}</div><div class='subtle'>{v} · {d}</div><div><a href='{u}' target='_blank'>{u}</a></div></div>") | |
| return "\n".join(rows) or "<i>No results</i>" | |
| def render_keyphrases(kp: List[Tuple[str, int]]) -> str: | |
| return " ".join([f"<span class='badge'>{k} × {v}</span>" for k, v in kp]) or "<i>None</i>" | |
| async def generate(query: str, save_to_raindrop: bool): | |
| perms = [Permission(server="crossref", scope="read", resource="literature"), | |
| Permission(server="quickchart", scope="write", resource="chart")] | |
| if save_to_raindrop: | |
| perms.append(Permission(server="raindrop", scope="write", resource="bookmarks")) | |
| agent = GenesisAgent() | |
| ctx = AgentContext(query=query, goals=["Literature review", "Key entity map"], permissions=perms) | |
| out = await agent.run_pipeline(ctx) | |
| if "error" in out: | |
| return gr.HTML.update(value=f"<div class='box'><b>Error:</b> {out['error']}</div>"), "", "", "" | |
| arts_html = render_articles(out["articles"]) \ | |
| + ("<div class='subtle' style='margin-top:6px'>(Showing up to 6)</div>") | |
| chart_url = out.get("chart", {}).get("url") or "" | |
| summary_blob = "\n\n".join([f"— {s}" for s in out["summaries"]]) | |
| keyphrase_html = render_keyphrases(out["keyphrases"]) \ | |
| + ("<div class='subtle' style='margin-top:6px'>(Top 10)</div>") | |
| return gr.HTML.update(value=arts_html), chart_url, summary_blob, gr.HTML.update(value=keyphrase_html) | |
| with gr.Blocks(css=CSS, title="GENESIS-AI MCP Studio") as demo: | |
| gr.Markdown(""" | |
| # GENESIS-AI MCP Studio | |
| A Hugging Face + MCP-inspired research agent that: | |
| - searches recent preprints (Crossref → med/bioRxiv), | |
| - summarizes with **BART**, | |
| - maps key entities/phrases (NER), | |
| - renders an instant chart (QuickChart), | |
| - optionally saves top links to **Raindrop**. | |
| > Swap/expand adapters to add RCSB PDB, Kube, GitHub Actions, Open Library, etc. | |
| """) | |
| with gr.Row(): | |
| query = gr.Textbox(label="Research query", placeholder="e.g., CRISPR base editing off-target detection") | |
| with gr.Row(): | |
| save = gr.Checkbox(label="Bookmark top results to Raindrop.io", value=False) | |
| go = gr.Button("Run Agent ▶", variant="primary") | |
| gr.Markdown("### Results") | |
| with gr.Row(): | |
| arts = gr.HTML() | |
| with gr.Row(): | |
| chart = gr.Image(label="Key Entities Chart (auto-generated)", type="filepath") | |
| with gr.Row(): | |
| summaries = gr.Textbox(label="Summaries", lines=12) | |
| with gr.Row(): | |
| phrases = gr.HTML() | |
| async def _run(q, s): | |
| html, chart_url, summ, kp = await generate(q, s) | |
| img_path = "" | |
| if chart_url: | |
| # Download chart to show inline in Spaces | |
| try: | |
| with httpx.Client(timeout=20) as client: | |
| resp = client.get(chart_url) | |
| if resp.status_code == 200: | |
| p = f"chart_{int(time.time())}.png" | |
| with open(p, "wb") as f: | |
| f.write(resp.content) | |
| img_path = p | |
| except Exception as e: | |
| rprint("[red]Chart download failed:", e) | |
| return html, img_path, summ, kp | |
| go.click(_run, inputs=[query, save], outputs=[arts, chart, summaries, phrases]) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860))) | |