Spaces:
Sleeping
Sleeping
File size: 1,633 Bytes
b5c82cb 2ccdeda b1a1cbf b5c82cb 2ccdeda 357f9ba 2ccdeda ece3e95 2ccdeda ece3e95 a86d83f ece3e95 357f9ba ece3e95 a86d83f ece3e95 55227d7 e3c2372 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import gradio as gr
import sqlite3
import os
import requests
import json
HF_DB_URL = "https://huggingface.co/datasets/babilonczyk/swiss_prot/resolve/main/proteins.db"
LOCAL_DB_PATH = "proteins.db"
# Download the DB only if not already cached
def download_db(url: str, local_path: str):
if not os.path.exists(local_path):
print(f"⬇️ Downloading database from {url}...")
r = requests.get(url)
r.raise_for_status()
with open(local_path, "wb") as f:
f.write(r.content)
print("✅ Download complete.")
else:
print("🟢 Database already exists locally.")
# Connect to SQLite
def get_connection(db_path: str) -> sqlite3.Connection:
return sqlite3.connect(db_path, check_same_thread=False)
# Setup
download_db(HF_DB_URL, LOCAL_DB_PATH)
conn = get_connection(LOCAL_DB_PATH)
cursor = conn.cursor()
# Search by protein ID (exact match)
def search_by_id(protein_id: str) -> str:
protein_id = protein_id.strip().upper()
cursor.execute("SELECT json FROM proteins WHERE id = ?", (protein_id,))
row = cursor.fetchone()
if not row:
return "❌ No match found."
try:
data = json.loads(row[0])
return json.dumps(data, indent=2)
except json.JSONDecodeError:
return "❌ Found row but failed to parse JSON."
# Gradio app
demo = gr.Interface(
fn=search_by_id,
inputs=gr.Textbox(label="Enter UniProt ID (e.g. P0DTC2)"),
outputs=gr.Textbox(label="Protein metadata (JSON)"),
title="SwissProt Search by ID",
description="Search Swiss-Prot database by UniProt protein ID"
)
demo.launch(show_api=True) |