Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import json | |
| import pandas as pd | |
| from huggingface_hub import hf_hub_download | |
| import plotly.express as px | |
| st.set_page_config( | |
| page_title="PhytoAI Assistant", | |
| page_icon="πΏ", | |
| layout="wide" | |
| ) | |
| def load_phytoai_data(): | |
| """Load PhytoAI data from HF dataset""" | |
| try: | |
| dataset_path = hf_hub_download( | |
| repo_id="Gatescrispy/phytoai-mega-dataset", | |
| filename="mega_final_dataset.json", | |
| repo_type="dataset" | |
| ) | |
| with open(dataset_path, 'r') as f: | |
| return json.load(f) | |
| except Exception as e: | |
| st.error(f"Data loading error: {e}") | |
| return None | |
| def main(): | |
| st.title("πΏ PhytoAI Assistant") | |
| st.markdown("### AI Assistant for Phytotherapy Research") | |
| st.markdown("---") | |
| # Load data | |
| with st.spinner("Loading PhytoAI data..."): | |
| data = load_phytoai_data() | |
| if data is None: | |
| st.error("β Unable to load PhytoAI data") | |
| st.info("The dataset will be available once uploaded to Hugging Face") | |
| # Demo data | |
| st.subheader("π PhytoAI Dataset Preview") | |
| st.write("**Dataset content:**") | |
| st.write("β’ 352 unique natural compounds") | |
| st.write("β’ 1,314 documented bioactivities") | |
| st.write("β’ Sources: PubChem, ChEMBL, scientific literature") | |
| return | |
| # Search interface | |
| st.sidebar.header("π Compound Search") | |
| search_type = st.sidebar.selectbox( | |
| "Search type:", | |
| ["Compound name", "Therapeutic activity"] | |
| ) | |
| if search_type == "Compound name": | |
| compound_search = st.sidebar.text_input( | |
| "Compound name", | |
| placeholder="curcumin, resveratrol, quercetin..." | |
| ) | |
| if compound_search: | |
| search_compounds_by_name(data, compound_search) | |
| elif search_type == "Therapeutic activity": | |
| activity_search = st.sidebar.selectbox( | |
| "Select an activity:", | |
| ["", "anti-inflammatory", "antioxidant", "cardiovascular", | |
| "neuroprotective", "anti-cancer", "antimicrobial"] | |
| ) | |
| if activity_search: | |
| search_by_therapeutic_activity(data, activity_search) | |
| # Main statistics | |
| display_main_statistics(data) | |
| # Visualizations | |
| create_visualizations(data) | |
| # Footer | |
| st.markdown("---") | |
| st.markdown("**πΏ PhytoAI** - AI Assistant for Phytotherapy Research") | |
| st.markdown("π [PhytoAI Dataset](https://huggingface.co/datasets/Gatescrispy/phytoai-mega-dataset) | π¬ Research & Development") | |
| def search_compounds_by_name(data, search_term): | |
| """Search by compound name""" | |
| st.subheader(f"π Results for '{search_term}'") | |
| results = [] | |
| for compound_id, compound_data in data.items(): | |
| compound_name = compound_data.get('compound_name', '').lower() | |
| if search_term.lower() in compound_name: | |
| results.append((compound_id, compound_data)) | |
| if results: | |
| for compound_id, compound_data in results[:5]: | |
| with st.expander(f"𧬠{compound_data.get('compound_name', 'Unknown compound')}"): | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.write("**Molecular Properties:**") | |
| st.write(f"β’ Formula: `{compound_data.get('molecular_formula', 'N/A')}`") | |
| st.write(f"β’ SMILES: `{compound_data.get('smiles', 'N/A')}`") | |
| st.write(f"β’ PubChem CID: `{compound_data.get('pubchem_cid', 'N/A')}`") | |
| with col2: | |
| st.write("**Bioactivities:**") | |
| bioactivities = compound_data.get('bioactivities', []) | |
| for i, activity in enumerate(bioactivities[:5]): | |
| st.write(f"β’ {activity.get('activity_type', 'N/A')}") | |
| if i >= 4 and len(bioactivities) > 5: | |
| st.write(f"... and {len(bioactivities) - 5} others") | |
| break | |
| else: | |
| st.info("No compounds found for this search") | |
| def search_by_therapeutic_activity(data, activity_type): | |
| """Search by therapeutic activity""" | |
| st.subheader(f"π― Compounds with activity: {activity_type}") | |
| matching_compounds = [] | |
| for compound_id, compound_data in data.items(): | |
| bioactivities = compound_data.get('bioactivities', []) | |
| for activity in bioactivities: | |
| if activity_type.lower() in activity.get('activity_type', '').lower(): | |
| matching_compounds.append({ | |
| 'Compound': compound_data.get('compound_name', 'N/A'), | |
| 'Formula': compound_data.get('molecular_formula', 'N/A'), | |
| 'Activity': activity.get('activity_type', 'N/A'), | |
| 'CID': compound_data.get('pubchem_cid', 'N/A') | |
| }) | |
| break | |
| if matching_compounds: | |
| df = pd.DataFrame(matching_compounds) | |
| st.dataframe(df, use_container_width=True) | |
| st.info(f"π {len(matching_compounds)} compounds found with this activity") | |
| else: | |
| st.warning("No compounds found for this activity") | |
| def display_main_statistics(data): | |
| """Display main statistics""" | |
| st.header("π PhytoAI Dataset Statistics") | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric("𧬠Total compounds", len(data)) | |
| with col2: | |
| total_bioactivities = sum(len(comp.get('bioactivities', [])) for comp in data.values()) | |
| st.metric("π¬ Total bioactivities", f"{total_bioactivities:,}") | |
| with col3: | |
| therapeutic_areas = set() | |
| for compound_data in data.values(): | |
| for activity in compound_data.get('bioactivities', []): | |
| activity_type = activity.get('activity_type', '').lower() | |
| if any(term in activity_type for term in ['anti-inflammatory', 'antioxidant', 'cardiovascular', 'neuroprotective', 'anti-cancer', 'antimicrobial']): | |
| therapeutic_areas.add(activity_type.split()[0] if activity_type else 'unknown') | |
| st.metric("π― Therapeutic areas", len(therapeutic_areas)) | |
| with col4: | |
| compounds_with_pubchem = sum(1 for comp in data.values() if comp.get('pubchem_cid')) | |
| coverage = (compounds_with_pubchem / len(data)) * 100 | |
| st.metric("π PubChem coverage", f"{coverage:.1f}%") | |
| def create_visualizations(data): | |
| """Create interactive visualizations""" | |
| st.header("π Interactive Visualizations") | |
| # Therapeutic activity analysis | |
| activity_counts = {} | |
| for compound_data in data.values(): | |
| for activity in compound_data.get('bioactivities', []): | |
| activity_type = activity.get('activity_type', '').lower() | |
| # Categorize activities | |
| if 'anti-inflammatory' in activity_type: | |
| activity_counts['Anti-inflammatory'] = activity_counts.get('Anti-inflammatory', 0) + 1 | |
| elif 'antioxidant' in activity_type: | |
| activity_counts['Antioxidant'] = activity_counts.get('Antioxidant', 0) + 1 | |
| elif 'cardiovascular' in activity_type: | |
| activity_counts['Cardiovascular'] = activity_counts.get('Cardiovascular', 0) + 1 | |
| elif 'neuroprotective' in activity_type: | |
| activity_counts['Neuroprotective'] = activity_counts.get('Neuroprotective', 0) + 1 | |
| elif 'anti-cancer' in activity_type or 'anticancer' in activity_type: | |
| activity_counts['Anti-cancer'] = activity_counts.get('Anti-cancer', 0) + 1 | |
| elif 'antimicrobial' in activity_type: | |
| activity_counts['Antimicrobial'] = activity_counts.get('Antimicrobial', 0) + 1 | |
| if activity_counts: | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| # Bar chart | |
| fig_bar = px.bar( | |
| x=list(activity_counts.keys()), | |
| y=list(activity_counts.values()), | |
| title="Distribution of Therapeutic Activities", | |
| labels={'x': 'Activity Type', 'y': 'Number of Compounds'}, | |
| color=list(activity_counts.values()), | |
| color_continuous_scale="Viridis" | |
| ) | |
| fig_bar.update_layout(showlegend=False) | |
| st.plotly_chart(fig_bar, use_container_width=True) | |
| with col2: | |
| # Pie chart | |
| fig_pie = px.pie( | |
| values=list(activity_counts.values()), | |
| names=list(activity_counts.keys()), | |
| title="Therapeutic Areas Distribution" | |
| ) | |
| st.plotly_chart(fig_pie, use_container_width=True) | |
| if __name__ == "__main__": | |
| main() | |