Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import numpy as np | |
| import torch | |
| import torch.nn as nn | |
| import os | |
| MODEL_PT_PATH = "HantaSeqNet.pt" | |
| MAX_LENGTH = 512 | |
| SAMPLE_SEQUENCES = { | |
| "Bayou": "TGCTGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGAGGATGATGATGATGATGATGATGATGATGTTGATGATGATGATTGCTGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGAGGATGATGATGATGATGATGATGATGATGTTGATGATGATGATTGCTGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGAGGATGATGATGATGATGATGATGATGATGTTGATGATGATGAT", | |
| "Dobrava": "GATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATTATGTTGATAATGATGATGATGATGATGATGATCATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATTATGTTGATAATGATGATGATGATGATGATGATCATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATTATGTTGATAATGATGATGATGATGATGATGATCAT", | |
| "Hantaan": "GCGACGACGACGACGACGACGACGACGACGATGACGACGATGACGATGACGATGATGATAATGATGATGATGATGATGATGATTATGATGAAGATGCGACGACGACGACGACGACGACGACGACGATGACGACGATGACGATGACGATGATGATAATGATGATGATGATGATGATGATTATGATGAAGATGCGACGACGACGACGACGACGACGACGACGATGACGACGATGACGATGACGATGATGATAATGATGATGATGATGATGATGATTATGATGAAGAT", | |
| "Puumala": "GACGCTGACGATGACGATTACGATGAGGATGTTGAGGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATTATGACGCTGACGATGACGATTACGATGAGGATGTTGAGGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATTATGACGCTGACGATGACGATTACGATGAGGATGTTGAGGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATTAT", | |
| "Seoul": "ATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGAGGATGATGATGATGATGATGATGATGATGTTGATGAAGATGATGATGAATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGAGGATGATGATGATGATGATGATGATGATGTTGATGAAGATGATGATGAATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGATGAGGATGATGATGATGATGATGATGATGATGTTGATGAAGATGATGATGA", | |
| "Sin Nombre": "GCCGTTCCGATGCGGGCGACGACGACGAGGCGATGACTTCCATGACGATGACGACGATGACGACGACGACGACGACGAAGACGACGACGACGACGGCCGTTCCGATGCGGGCGACGACGACGAGGCGATGACTTCCATGACGATGACGACGATGACGACGACGACGACGACGAAGACGACGACGACGACGGCCGTTCCGATGCGGGCGACGACGACGAGGCGATGACTTCCATGACGATGACGACGATGACGACGACGACGACGACGAAGACGACGACGACGACG", | |
| } | |
| PAGE_CSS = """ | |
| <style> | |
| @import url('https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=Inter:wght@300;400;500;600;700&display=swap'); | |
| html, body, [data-testid="stAppViewContainer"], [data-testid="stMain"] { | |
| background-color: #f0f7ff !important; | |
| font-family: 'Inter', sans-serif !important; | |
| } | |
| [data-testid="stSidebar"] { display: none !important; } | |
| header[data-testid="stHeader"] { display: none !important; } | |
| footer { display: none !important; } | |
| .block-container { | |
| padding: 2.5rem 3rem 4rem !important; | |
| max-width: 1000px !important; | |
| } | |
| .stButton > button { | |
| background: #2563eb !important; | |
| color: #ffffff !important; | |
| border: none !important; | |
| font-family: 'Inter', sans-serif !important; | |
| font-weight: 600 !important; | |
| font-size: 0.9rem !important; | |
| padding: 0.65rem 1.2rem !important; | |
| border-radius: 8px !important; | |
| width: 100% !important; | |
| transition: background 0.2s !important; | |
| } | |
| .stButton > button:hover { | |
| background: #1d4ed8 !important; | |
| box-shadow: 0 4px 14px rgba(37,99,235,.25) !important; | |
| } | |
| .page-header { | |
| background: linear-gradient(135deg, #eff6ff, #e0f2fe); | |
| border: 1px solid #bae6fd; | |
| border-radius: 14px; | |
| padding: 1.6rem 2rem; | |
| margin-bottom: 2rem; | |
| display: flex; | |
| align-items: center; | |
| gap: 1.2rem; | |
| } | |
| .page-header-icon { font-size: 2.2rem; line-height: 1; } | |
| .page-header-eyebrow { | |
| font-family: 'Space Mono', monospace; | |
| font-size: 0.67rem; | |
| letter-spacing: 0.15em; | |
| color: #0284c7; | |
| text-transform: uppercase; | |
| margin-bottom: 0.25rem; | |
| } | |
| .page-header-title { | |
| font-family: 'Space Mono', monospace; | |
| font-size: 1.3rem; | |
| font-weight: 700; | |
| color: #0f172a; | |
| margin-bottom: 0.2rem; | |
| } | |
| .page-header-sub { font-size: 0.85rem; color: #64748b; } | |
| .section-label { | |
| font-family: 'Space Mono', monospace; | |
| font-size: 0.67rem; | |
| letter-spacing: 0.18em; | |
| color: #0284c7; | |
| text-transform: uppercase; | |
| border-left: 3px solid #38bdf8; | |
| padding-left: 0.7rem; | |
| margin: 1.6rem 0 0.8rem; | |
| display: block; | |
| } | |
| .info-box { | |
| background: #f0f9ff; | |
| border: 1px solid #bae6fd; | |
| border-left: 4px solid #38bdf8; | |
| border-radius: 8px; | |
| padding: 1rem 1.2rem; | |
| font-size: 0.87rem; | |
| color: #0c4a6e; | |
| line-height: 1.65; | |
| } | |
| .info-box strong { color: #0369a1; } | |
| .info-box code { | |
| background: #dbeafe; | |
| padding: 0.1rem 0.35rem; | |
| border-radius: 4px; | |
| font-family: 'Space Mono', monospace; | |
| font-size: 0.82rem; | |
| color: #1d4ed8; | |
| } | |
| .sample-hint { | |
| font-size: 0.82rem; | |
| color: #64748b; | |
| margin-bottom: 0.7rem; | |
| font-weight: 500; | |
| } | |
| .seq-counter { font-family: 'Space Mono', monospace; font-size: 0.75rem; color: #64748b; margin-top: 0.35rem; display: inline-block; } | |
| .seq-counter.ok { color: #16a34a; } | |
| .seq-counter.warn { color: #d97706; } | |
| div[data-testid="stTextArea"] label { | |
| font-size: 0.84rem !important; | |
| font-weight: 500 !important; | |
| color: #374151 !important; | |
| } | |
| div[data-testid="stTextArea"] textarea { | |
| background: #ffffff !important; | |
| border: 1.5px solid #e2e8f0 !important; | |
| border-radius: 8px !important; | |
| color: #0f172a !important; | |
| font-family: 'Space Mono', monospace !important; | |
| font-size: 0.8rem !important; | |
| line-height: 1.6 !important; | |
| } | |
| div[data-testid="stTextArea"] textarea:focus { | |
| border-color: #38bdf8 !important; | |
| box-shadow: 0 0 0 3px rgba(56,189,248,.15) !important; | |
| } | |
| .result-wrap { | |
| background: linear-gradient(135deg, #eff6ff, #e0f2fe); | |
| border: 1.5px solid #bae6fd; | |
| border-radius: 14px; | |
| padding: 2rem 2rem 1.6rem; | |
| margin-top: 1.5rem; | |
| position: relative; | |
| overflow: hidden; | |
| } | |
| .result-wrap::before { | |
| content: ''; | |
| position: absolute; | |
| top: 0; left: 0; right: 0; | |
| height: 4px; | |
| background: linear-gradient(90deg, #3b82f6, #38bdf8, #06b6d4); | |
| border-radius: 14px 14px 0 0; | |
| } | |
| .result-meta { | |
| font-family: 'Space Mono', monospace; | |
| font-size: 0.66rem; | |
| letter-spacing: 0.13em; | |
| text-transform: uppercase; | |
| color: #64748b; | |
| margin-bottom: 0.5rem; | |
| } | |
| .result-class { | |
| font-family: 'Space Mono', monospace; | |
| font-size: 1.9rem; | |
| font-weight: 700; | |
| color: #1d4ed8; | |
| line-height: 1; | |
| margin-bottom: 0.4rem; | |
| } | |
| .result-sub { | |
| font-size: 0.87rem; | |
| color: #475569; | |
| margin-bottom: 1.6rem; | |
| line-height: 1.55; | |
| } | |
| .result-sub strong { color: #1e40af; } | |
| .conf-title { | |
| font-family: 'Space Mono', monospace; | |
| font-size: 0.66rem; | |
| letter-spacing: 0.13em; | |
| text-transform: uppercase; | |
| color: #64748b; | |
| margin-bottom: 0.75rem; | |
| } | |
| .conf-row { display: flex; align-items: center; gap: 0.9rem; margin-bottom: 0.65rem; } | |
| .conf-lbl { font-size: 0.83rem; color: #374151; width: 90px; flex-shrink: 0; font-weight: 500; } | |
| .conf-track { flex: 1; height: 9px; background: #dbeafe; border-radius: 100px; overflow: hidden; } | |
| .conf-fill { height: 100%; border-radius: 100px; } | |
| .conf-fill.top { background: linear-gradient(90deg, #2563eb, #38bdf8); } | |
| .conf-fill.rest { background: #93c5fd; opacity: 0.45; } | |
| .conf-pct { font-family: 'Space Mono', monospace; font-size: 0.83rem; color: #0f172a; width: 50px; text-align: right; flex-shrink: 0; font-weight: 600; } | |
| .top-badge { | |
| display: inline-block; | |
| font-family: 'Space Mono', monospace; | |
| font-size: 0.6rem; | |
| padding: 0.1rem 0.4rem; | |
| border-radius: 100px; | |
| background: #dbeafe; | |
| border: 1px solid #93c5fd; | |
| color: #1d4ed8; | |
| margin-left: 0.4rem; | |
| vertical-align: middle; | |
| } | |
| .disclaimer { | |
| margin-top: 1.4rem; | |
| padding: 0.85rem 1rem; | |
| background: rgba(251,191,36,.1); | |
| border: 1px solid rgba(251,191,36,.35); | |
| border-radius: 8px; | |
| font-size: 0.79rem; | |
| color: #92400e; | |
| line-height: 1.55; | |
| } | |
| </style> | |
| """ | |
| class SmallANN(nn.Module): | |
| def __init__(self, input_dim, num_classes): | |
| super().__init__() | |
| self.network = nn.Sequential( | |
| nn.Linear(input_dim, 256), nn.ReLU(), nn.Dropout(0.3), | |
| nn.Linear(256, 64), nn.ReLU(), nn.Dropout(0.2), | |
| nn.Linear(64, num_classes) | |
| ) | |
| def forward(self, x): | |
| return self.network(x) | |
| def load_seq_model(): | |
| import os | |
| # HantaSeqNet.pt ada di Space repo, jadi path relatif langsung | |
| path = "HantaSeqNet.pt" | |
| bundle = torch.load(path, map_location="cpu", weights_only=False) | |
| ann = SmallANN(bundle["input_dim"], bundle["num_classes"]) | |
| ann.load_state_dict(bundle["model_state_dict"]) | |
| ann.eval() | |
| return ann, bundle["class_names"] | |
| def load_dnabert(): | |
| from multimolecule import AutoTokenizer, DnaBert2Model | |
| from huggingface_hub import snapshot_download | |
| import os | |
| token = os.environ.get("HF_TOKEN") | |
| local_dir = snapshot_download( | |
| repo_id="RangGaraga/My_DNABert", | |
| token=token, | |
| local_dir="/tmp/dnabert2" # ← simpan ke /tmp agar lebih cepat | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(local_dir, trust_remote_code=True) | |
| dnabert = DnaBert2Model.from_pretrained(local_dir, trust_remote_code=True) | |
| dnabert.eval() | |
| for p in dnabert.parameters(): | |
| p.requires_grad = False | |
| return tokenizer, dnabert | |
| def extract_embedding(seq, tokenizer, dnabert): | |
| inputs = tokenizer(seq, return_tensors="pt", truncation=True, | |
| padding="max_length", max_length=MAX_LENGTH) | |
| with torch.no_grad(): | |
| out = dnabert(**inputs) | |
| return out.last_hidden_state.mean(dim=1).squeeze(0).numpy() | |
| def clean_sequence(raw): | |
| lines = [] | |
| for line in raw.strip().splitlines(): | |
| line = line.strip() | |
| if not line.startswith(">"): | |
| lines.append(line.upper()) | |
| return "".join(lines) | |
| def conf_bar(label, pct, is_top): | |
| cls = "top" if is_top else "rest" | |
| badge = '<span class="top-badge">▲ TOP</span>' if is_top else "" | |
| return f""" | |
| <div class="conf-row"> | |
| <div class="conf-lbl">{label}{badge}</div> | |
| <div class="conf-track"><div class="conf-fill {cls}" style="width:{pct:.1f}%"></div></div> | |
| <div class="conf-pct">{pct:.1f}%</div> | |
| </div>""" | |
| def render(goto): | |
| st.markdown(PAGE_CSS, unsafe_allow_html=True) | |
| # Inisialisasi session state untuk textarea dan selected sample | |
| if "seq_textarea" not in st.session_state: | |
| st.session_state["seq_textarea"] = "" | |
| if "selected_sample" not in st.session_state: | |
| st.session_state["selected_sample"] = None | |
| if st.button("← Kembali ke Home", key="back_seq"): | |
| goto("home") | |
| st.markdown(""" | |
| <div class="page-header"> | |
| <div class="page-header-icon">🧬</div> | |
| <div> | |
| <div class="page-header-eyebrow">Model 02 · HantaSeqNet</div> | |
| <div class="page-header-title">Input Sekuens DNA</div> | |
| <div class="page-header-sub">Klasifikasi tipe Hantavirus berdasarkan sekuens genom menggunakan DNABERT-2 + ANN.</div> | |
| </div> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # INFO | |
| st.markdown('<span class="section-label">💡 Panduan</span>', unsafe_allow_html=True) | |
| st.markdown(""" | |
| <div class="info-box"> | |
| <strong>Apa itu sekuens DNA?</strong><br> | |
| Sekuens DNA adalah rangkaian karakter <code>A</code>, <code>T</code>, <code>G</code>, <code>C</code> | |
| yang merepresentasikan kode genetik virus — biasanya hasil dari sequencing laboratorium | |
| dalam format <strong>FASTA</strong>. Jika tidak punya data sendiri, | |
| gunakan tombol contoh di bawah untuk mencoba sistem ini secara langsung. | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # SAMPLE BUTTONS | |
| st.markdown('<span class="section-label">🧪 Contoh Sekuens</span>', unsafe_allow_html=True) | |
| st.markdown( | |
| '<p class="sample-hint">Klik salah satu kelas di bawah untuk mengisi sekuens contoh secara otomatis:</p>', | |
| unsafe_allow_html=True | |
| ) | |
| cols = st.columns(len(SAMPLE_SEQUENCES)) | |
| for i, cls_name in enumerate(SAMPLE_SEQUENCES): | |
| with cols[i]: | |
| is_sel = st.session_state["selected_sample"] == cls_name | |
| label = f"✓ {cls_name}" if is_sel else cls_name | |
| if st.button(label, key=f"sample_{cls_name}", use_container_width=True): | |
| st.session_state["seq_textarea"] = SAMPLE_SEQUENCES[cls_name] | |
| st.session_state["selected_sample"] = cls_name | |
| st.rerun() | |
| # TEXTAREA — key="seq_textarea" sesuai session state yang diupdate tombol | |
| st.markdown('<span class="section-label">✏️ Input Sekuens DNA</span>', unsafe_allow_html=True) | |
| raw_input = st.text_area( | |
| label="Masukkan sekuens DNA (format FASTA atau plain sequence):", | |
| height=190, | |
| placeholder=( | |
| "Contoh plain sequence:\n" | |
| "ATGATGATGATGATGATGATGATGATGAT...\n\n" | |
| "Atau format FASTA:\n" | |
| ">SampleName|Seoul\n" | |
| "ATGATGATGATGATGATGATGATGATGAT..." | |
| ), | |
| key="seq_textarea", | |
| ) | |
| seq_len = len(clean_sequence(raw_input)) | |
| ok = seq_len >= 50 | |
| counter_cls = "ok" if ok else "warn" | |
| counter_ico = "✅" if ok else "⚠️" | |
| suffix = "" if ok else " — minimal 50 bp" | |
| st.markdown( | |
| f'<span class="seq-counter {counter_cls}">' | |
| f'{counter_ico} Panjang sekuens: <strong>{seq_len:,} bp</strong>{suffix}</span>', | |
| unsafe_allow_html=True | |
| ) | |
| # PREDICT BUTTON | |
| st.markdown("<div style='height:0.6rem'></div>", unsafe_allow_html=True) | |
| _, btn_col, _ = st.columns([1, 4, 1]) | |
| with btn_col: | |
| predict_clicked = st.button("Jalankan Klasifikasi", key="predict_seq", use_container_width=True) | |
| # HASIL | |
| if predict_clicked: | |
| sequence = clean_sequence(raw_input) | |
| if len(sequence) < 50: | |
| st.warning("Sekuens terlalu pendek. Masukkan minimal 50 base pair (bp).") | |
| return | |
| valid_chars = set("ATGCN") | |
| invalid = set(sequence) - valid_chars | |
| if invalid: | |
| st.warning( | |
| f"Karakter tidak valid: `{''.join(sorted(invalid))}`. " | |
| f"Hanya A, T, G, C, N yang diperbolehkan." | |
| ) | |
| return | |
| try: | |
| ann, class_names = load_seq_model() | |
| except FileNotFoundError: | |
| st.error(f"Model tidak ditemukan di: {MODEL_PT_PATH}") | |
| return | |
| try: | |
| tokenizer, dnabert = load_dnabert() | |
| except Exception as e: | |
| st.error(f"Gagal memuat DNABERT-2: {e}") | |
| return | |
| with st.spinner("Mengekstrak embedding dengan DNABERT-2"): | |
| emb = extract_embedding(sequence, tokenizer, dnabert) | |
| with torch.no_grad(): | |
| tensor = torch.tensor(emb, dtype=torch.float32).unsqueeze(0) | |
| probs = torch.softmax(ann(tensor), dim=1).squeeze(0).numpy() | |
| sorted_idx = np.argsort(probs)[::-1] | |
| top_class = class_names[sorted_idx[0]] | |
| top_pct = probs[sorted_idx[0]] * 100 | |
| all_bars = "".join( | |
| conf_bar(class_names[i], probs[i] * 100, rank == 0) | |
| for rank, i in enumerate(sorted_idx) | |
| ) | |
| st.markdown(f""" | |
| <div class="result-wrap"> | |
| <div class="result-meta">Hasil Klasifikasi · HantaSeqNet</div> | |
| <div class="result-class">{top_class} Hantavirus</div> | |
| <div class="result-sub"> | |
| Sekuens yang diinput paling mirip dengan tipe | |
| <strong>{top_class}</strong> dengan confidence | |
| <strong>{top_pct:.1f}%</strong>. | |
| </div> | |
| <div class="conf-title">Distribusi Confidence per Kelas</div> | |
| {all_bars} | |
| <div class="disclaimer"> | |
| ⚠️ <strong>Perhatian:</strong> Hasil klasifikasi ini didasarkan pada similarity embedding | |
| sekuens terhadap data training. Konfirmasi dengan analisis filogenetik atau uji PCR spesifik | |
| tetap diperlukan. Untuk keperluan riset dan edukasi saja. | |
| </div> | |
| </div> | |
| """, unsafe_allow_html=True) |