swiss_prot / app.py
babilonczyk's picture
Update app.py
a86d83f verified
import gradio as gr
import sqlite3
import os
import requests
import json
HF_DB_URL = "https://huggingface.co/datasets/babilonczyk/swiss_prot/resolve/main/proteins.db"
LOCAL_DB_PATH = "proteins.db"
# Download the DB only if not already cached
def download_db(url: str, local_path: str):
if not os.path.exists(local_path):
print(f"⬇️ Downloading database from {url}...")
r = requests.get(url)
r.raise_for_status()
with open(local_path, "wb") as f:
f.write(r.content)
print("βœ… Download complete.")
else:
print("🟒 Database already exists locally.")
# Connect to SQLite
def get_connection(db_path: str) -> sqlite3.Connection:
return sqlite3.connect(db_path, check_same_thread=False)
# Setup
download_db(HF_DB_URL, LOCAL_DB_PATH)
conn = get_connection(LOCAL_DB_PATH)
cursor = conn.cursor()
# Search by protein ID (exact match)
def search_by_id(protein_id: str) -> str:
protein_id = protein_id.strip().upper()
cursor.execute("SELECT json FROM proteins WHERE id = ?", (protein_id,))
row = cursor.fetchone()
if not row:
return "❌ No match found."
try:
data = json.loads(row[0])
return json.dumps(data, indent=2)
except json.JSONDecodeError:
return "❌ Found row but failed to parse JSON."
# Gradio app
demo = gr.Interface(
fn=search_by_id,
inputs=gr.Textbox(label="Enter UniProt ID (e.g. P0DTC2)"),
outputs=gr.Textbox(label="Protein metadata (JSON)"),
title="SwissProt Search by ID",
description="Search Swiss-Prot database by UniProt protein ID"
)
demo.launch(show_api=True)