MotifAlign / example_data_loader.py
jiehou's picture
Upload example_data_loader.py
9f27665 verified
"""
Example Data Loader for RNA Motif Comparison Tool
Provides functionality to load example PDB files from data folder
"""
import os
from pathlib import Path
import streamlit as st
def get_example_pdbs(data_folder="data"):
"""
Get list of example PDB files from the data folder.
Args:
data_folder: Path to folder containing example PDB files
Returns:
Dictionary with filename as key and full path as value
"""
examples = {}
if not os.path.exists(data_folder):
return examples
# Get all PDB files in the data folder
data_path = Path(data_folder)
for pdb_file in data_path.glob("*.pdb"):
examples[pdb_file.name] = str(pdb_file)
# Also check for uppercase .PDB extension
for pdb_file in data_path.glob("*.PDB"):
examples[pdb_file.name] = str(pdb_file)
return examples
def create_example_selector(data_folder="data"):
"""
Create a Streamlit interface for selecting example PDB files.
Args:
data_folder: Path to folder containing example PDB files
Returns:
List of selected file paths
"""
examples = get_example_pdbs(data_folder)
if not examples:
st.warning(f"โš ๏ธ No example PDB files found in '{data_folder}/' folder")
return []
st.info(f"๐Ÿ“ Found {len(examples)} example PDB files in '{data_folder}/' folder")
# Create multiselect for choosing examples
selected_names = st.multiselect(
"Select example PDB files to load",
options=sorted(examples.keys()),
help="Choose one or more example structures"
)
# Return full paths of selected files
selected_paths = [examples[name] for name in selected_names]
if selected_paths:
st.success(f"โœ… Selected {len(selected_paths)} example file(s)")
return selected_paths
def load_example_as_uploaded_file(file_path):
"""
Load a PDB file and convert it to a format similar to Streamlit's UploadedFile.
Args:
file_path: Path to the PDB file
Returns:
File-like object with name and getbuffer() method
"""
class MockUploadedFile:
def __init__(self, path):
self.name = os.path.basename(path)
self.path = path
with open(path, 'rb') as f:
self._content = f.read()
def getbuffer(self):
return self._content
def read(self):
return self._content
return MockUploadedFile(file_path)
def get_example_info(data_folder="data"):
"""
Get information about example PDB files.
Args:
data_folder: Path to folder containing example PDB files
Returns:
Dictionary with file info
"""
examples = get_example_pdbs(data_folder)
info = {}
for name, path in examples.items():
try:
with open(path, 'r') as f:
lines = f.readlines()
# Count atoms/residues
atom_count = sum(1 for line in lines if line.startswith('ATOM') or line.startswith('HETATM'))
# Get header info if available
header = ""
for line in lines:
if line.startswith('HEADER'):
header = line[10:].strip()
break
info[name] = {
'path': path,
'atoms': atom_count,
'header': header,
'lines': len(lines)
}
except Exception as e:
info[name] = {
'path': path,
'error': str(e)
}
return info