Spaces:

babilonczyk
/

swiss_prot

Sleeping

File size: 1,633 Bytes

b5c82cb
 
 
2ccdeda
b1a1cbf
b5c82cb
2ccdeda
 
357f9ba
2ccdeda
 
 
 
 
 
 
 
 
 
 
ece3e95
2ccdeda
 
 
 
 
 
 
ece3e95
 
a86d83f
 
 
 
 
 
 
 
 
 
 
 
ece3e95
357f9ba
ece3e95
a86d83f
 
 
 
 
ece3e95
55227d7
e3c2372

import gradio as gr
import sqlite3
import os
import requests
import json

HF_DB_URL = "https://huggingface.co/datasets/babilonczyk/swiss_prot/resolve/main/proteins.db"
LOCAL_DB_PATH = "proteins.db"

# Download the DB only if not already cached
def download_db(url: str, local_path: str):
    if not os.path.exists(local_path):
        print(f"⬇️ Downloading database from {url}...")
        r = requests.get(url)
        r.raise_for_status()
        with open(local_path, "wb") as f:
            f.write(r.content)
        print("✅ Download complete.")
    else:
        print("🟢 Database already exists locally.")

# Connect to SQLite
def get_connection(db_path: str) -> sqlite3.Connection:
    return sqlite3.connect(db_path, check_same_thread=False)

# Setup
download_db(HF_DB_URL, LOCAL_DB_PATH)
conn = get_connection(LOCAL_DB_PATH)
cursor = conn.cursor()

# Search by protein ID (exact match)
def search_by_id(protein_id: str) -> str:
    protein_id = protein_id.strip().upper()
    cursor.execute("SELECT json FROM proteins WHERE id = ?", (protein_id,))
    row = cursor.fetchone()
    if not row:
        return "❌ No match found."
    try:
        data = json.loads(row[0])
        return json.dumps(data, indent=2)
    except json.JSONDecodeError:
        return "❌ Found row but failed to parse JSON."

# Gradio app
demo = gr.Interface(
    fn=search_by_id,
    inputs=gr.Textbox(label="Enter UniProt ID (e.g. P0DTC2)"),
    outputs=gr.Textbox(label="Protein metadata (JSON)"),
    title="SwissProt Search by ID",
    description="Search Swiss-Prot database by UniProt protein ID"
)

demo.launch(show_api=True)