import gradio as gr import sqlite3 import os import requests import json HF_DB_URL = "https://huggingface.co/datasets/babilonczyk/swiss_prot/resolve/main/proteins.db" LOCAL_DB_PATH = "proteins.db" # Download the DB only if not already cached def download_db(url: str, local_path: str): if not os.path.exists(local_path): print(f"⬇️ Downloading database from {url}...") r = requests.get(url) r.raise_for_status() with open(local_path, "wb") as f: f.write(r.content) print("✅ Download complete.") else: print("🟢 Database already exists locally.") # Connect to SQLite def get_connection(db_path: str) -> sqlite3.Connection: return sqlite3.connect(db_path, check_same_thread=False) # Setup download_db(HF_DB_URL, LOCAL_DB_PATH) conn = get_connection(LOCAL_DB_PATH) cursor = conn.cursor() # Search by protein ID (exact match) def search_by_id(protein_id: str) -> str: protein_id = protein_id.strip().upper() cursor.execute("SELECT json FROM proteins WHERE id = ?", (protein_id,)) row = cursor.fetchone() if not row: return "❌ No match found." try: data = json.loads(row[0]) return json.dumps(data, indent=2) except json.JSONDecodeError: return "❌ Found row but failed to parse JSON." # Gradio app demo = gr.Interface( fn=search_by_id, inputs=gr.Textbox(label="Enter UniProt ID (e.g. P0DTC2)"), outputs=gr.Textbox(label="Protein metadata (JSON)"), title="SwissProt Search by ID", description="Search Swiss-Prot database by UniProt protein ID" ) demo.launch(show_api=True)