File size: 1,633 Bytes
b5c82cb
 
 
2ccdeda
b1a1cbf
b5c82cb
2ccdeda
 
357f9ba
2ccdeda
 
 
 
 
 
 
 
 
 
 
ece3e95
2ccdeda
 
 
 
 
 
 
ece3e95
 
a86d83f
 
 
 
 
 
 
 
 
 
 
 
ece3e95
357f9ba
ece3e95
a86d83f
 
 
 
 
ece3e95
55227d7
e3c2372
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import gradio as gr
import sqlite3
import os
import requests
import json

HF_DB_URL = "https://huggingface.co/datasets/babilonczyk/swiss_prot/resolve/main/proteins.db"
LOCAL_DB_PATH = "proteins.db"

# Download the DB only if not already cached
def download_db(url: str, local_path: str):
    if not os.path.exists(local_path):
        print(f"⬇️ Downloading database from {url}...")
        r = requests.get(url)
        r.raise_for_status()
        with open(local_path, "wb") as f:
            f.write(r.content)
        print("✅ Download complete.")
    else:
        print("🟢 Database already exists locally.")

# Connect to SQLite
def get_connection(db_path: str) -> sqlite3.Connection:
    return sqlite3.connect(db_path, check_same_thread=False)

# Setup
download_db(HF_DB_URL, LOCAL_DB_PATH)
conn = get_connection(LOCAL_DB_PATH)
cursor = conn.cursor()

# Search by protein ID (exact match)
def search_by_id(protein_id: str) -> str:
    protein_id = protein_id.strip().upper()
    cursor.execute("SELECT json FROM proteins WHERE id = ?", (protein_id,))
    row = cursor.fetchone()
    if not row:
        return "❌ No match found."
    try:
        data = json.loads(row[0])
        return json.dumps(data, indent=2)
    except json.JSONDecodeError:
        return "❌ Found row but failed to parse JSON."

# Gradio app
demo = gr.Interface(
    fn=search_by_id,
    inputs=gr.Textbox(label="Enter UniProt ID (e.g. P0DTC2)"),
    outputs=gr.Textbox(label="Protein metadata (JSON)"),
    title="SwissProt Search by ID",
    description="Search Swiss-Prot database by UniProt protein ID"
)

demo.launch(show_api=True)