Polymer-Property-Prediction

Runtime error

App Files Files Community

liuganghuggingface commited on Oct 2, 2025

Commit

286c763

1 Parent(s): b503fed

add init

Browse files

Files changed (4) hide show

.gradio/certificate.pem +31 -0
README.md +3 -1
app.py +880 -0
requirements.txt +12 -0

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

README.md CHANGED Viewed

@@ -11,4 +11,6 @@ license: mit
 short_description: 'Polymer property prediction for gas separation design '
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 short_description: 'Polymer property prediction for gas separation design '
 ---
+<!-- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -->
+Based on torch-molecule (https://github.com/liugangcode/torch-molecule) and sklearn.

app.py ADDED Viewed

	@@ -0,0 +1,880 @@

+import gradio as gr
+import torch
+import numpy as np
+import pandas as pd
+import os
+import tempfile
+from pathlib import Path
+import pickle
+import joblib
+from rdkit import Chem
+from rdkit.Chem import Draw, AllChem
+import plotly.graph_objects as go
+import plotly.express as px
+from huggingface_hub import hf_hub_download
+# Import torch_molecule models
+try:
+    from torch_molecule import GREAMolecularPredictor, GNNMolecularPredictor
+    TORCH_MOLECULE_AVAILABLE = True
+except ImportError:
+    TORCH_MOLECULE_AVAILABLE = False
+    print("Warning: torch_molecule not available. Some models may not work.")
+all_properties = ['CH4', 'CO2', 'H2', 'N2', 'O2']
+all_model_names = ['GREA', 'GCN', 'GIN', 'RandomForest', 'GaussianProcess']
+# Training configuration - set to True if models were trained in log space
+TRAIN_IN_LOG = True
+# HuggingFace repository ID
+HF_REPO_ID = "liuganghuggingface/polymer-prediction-gas-models"
+# Default SMILES for testing
+DEFAULT_SMILES = """*c1cc2c(cc1*)C1(C(C)C)c3ccccc3C2(C(C)C)c2cc3c(cc21)Oc1cc2nc(*)c(*)nc2cc1O3
+*CN1CN(*)Cc2cc3c(cc21)C1c2ccccc2C3c2cc(*)c(*)cc21
+*C(=C(*)c1ccc2c(c1)C(C)(C)C(C)(C)C2(C)C)c1ccccc1"""
+# Selectivity boundary parameters (from 3_create_polymer_oracle.py)
+SELECTIVITY_BOUNDS = {
+    'CO2/CH4': {
+        'x': [1.00E+05, 1.00E-02],
+        'y': [1.00E+05/2.21E+04, 1.00E-02/4.88E-06],
+        'gases': ('CO2', 'CH4')
+    },
+    'H2/CH4': {
+        'x': [5.00E+04, 2.50E+00],
+        'y': [5.00E+04/8.67E+04, 2.50E+00/5.64E-04],
+        'gases': ('H2', 'CH4')
+    },
+    'O2/N2': {
+        'x': [5.00E+04, 1.00E-03],
+        'y': [5.00E+04/2.78E+04, 1.00E-03/2.43E-05],
+        'gases': ('O2', 'N2')
+    },
+    'H2/N2': {
+        'x': [1.00E+05, 1.00E-01],
+        'y': [1.00E+05/1.02E+05, 1.00E-01/9.21E-06],
+        'gases': ('H2', 'N2')
+    },
+    'CO2/N2': {
+        'x': [1.00E+06, 1.00E-04],
+        'y': [1.00E+06/3.05E+05, 1.00E-04/1.05E-08],
+        'gases': ('CO2', 'N2')
+    }
+}
+# ============= MODEL LOADING =============
+def load_all_models():
+    """
+    Load all available models from HuggingFace Hub at startup.
+    Returns:
+        Dictionary with structure: {model_name: {gas: (model, model_type)}}
+    """
+    print("Loading all models from HuggingFace Hub...")
+    loaded_models = {}
+    for model_name in all_model_names:
+        loaded_models[model_name] = {}
+        for gas in all_properties:
+            model_filename = f"{model_name.lower()}_{gas.lower()}"
+            try:
+                if model_name in ['GREA', 'GCN', 'GIN']:
+                    filename = f"{model_filename}.pt"
+                    if not TORCH_MOLECULE_AVAILABLE:
+                        print(f"  ⚠️  torch_molecule not available for {model_name}")
+                        continue
+                    # Download model from HuggingFace Hub
+                    print(f"  Downloading {filename} from HuggingFace Hub...")
+                    model_path = hf_hub_download(
+                        repo_id=HF_REPO_ID,
+                        filename=filename
+                    )
+                    print('model path for .pt file: ', model_path)
+                    # Instantiate model architecture
+                    if model_name == 'GREA':
+                        model = GREAMolecularPredictor()
+                    elif model_name == 'GCN':
+                        model = GNNMolecularPredictor(gnn_type='gcn-virtual')
+                    elif model_name == 'GIN':
+                        model = GNNMolecularPredictor(gnn_type='gin-virtual')
+                    # Load model weights from downloaded file
+                    model.load_from_local(model_path)
+                    loaded_models[model_name][gas] = (model, 'torch_molecule')
+                    print(f"  ✓ Loaded {model_name} for {gas}")
+                else:  # sklearn models
+                    filename = f"{model_filename}.pkl"
+                    # Download model from HuggingFace Hub
+                    print(f"  Downloading {filename} from HuggingFace Hub...")
+                    model_path = hf_hub_download(
+                        repo_id=HF_REPO_ID,
+                        filename=filename
+                    )
+                    print('model path for .pkl file: ', model_path)
+                    # Load sklearn model with joblib
+                    model = joblib.load(model_path)
+                    loaded_models[model_name][gas] = (model, 'sklearn')
+                    print(f"  ✓ Loaded {model_name} for {gas}")
+            except Exception as e:
+                print(f"  ❌ Error loading {model_name} for {gas}: {e}")
+    print("Model loading complete!")
+    return loaded_models
+# Load all models at startup
+PRELOADED_MODELS = load_all_models()
+# ============= PREDICTION FUNCTIONS =============
+def validate_smiles(smiles_list):
+    """
+    Validate a list of SMILES strings.
+    Returns:
+        valid_smiles: List of valid SMILES (standardized)
+        invalid_smiles: List of invalid SMILES with indices
+        validation_report: String report of validation
+    """
+    valid_smiles = []
+    invalid_smiles = []
+    for idx, smiles in enumerate(smiles_list):
+        smiles = smiles.strip()
+        if not smiles:
+            continue
+        mol = Chem.MolFromSmiles(smiles)
+        if mol is not None:
+            # Standardize SMILES
+            standardized = Chem.MolToSmiles(mol, isomericSmiles=True)
+            valid_smiles.append((idx, smiles, standardized))
+        else:
+            invalid_smiles.append((idx, smiles))
+    report = f"✅ Valid SMILES: {len(valid_smiles)}\n"
+    report += f"❌ Invalid SMILES: {len(invalid_smiles)}\n"
+    if invalid_smiles:
+        report += "\n**Invalid SMILES detected:**\n"
+        for idx, smiles in invalid_smiles:
+            report += f"  - Line {idx + 1}: `{smiles}`\n"
+        report += "\n⚠️ **Please remove or correct the invalid SMILES before proceeding.**"
+    return valid_smiles, invalid_smiles, report
+def smiles_to_fingerprint(smiles_list, n_bits=2048):
+    """Convert SMILES to Morgan fingerprints for sklearn models."""
+    fingerprints = []
+    for smiles in smiles_list:
+        mol = Chem.MolFromSmiles(smiles)
+        if mol is not None:
+            fp = AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=n_bits)
+            fingerprints.append(np.array(fp))
+        else:
+            fingerprints.append(np.zeros(n_bits))
+    return np.array(fingerprints)
+def predict_properties(smiles_list, selected_models, progress=gr.Progress()):
+    """
+    Predict properties for a list of SMILES using selected models.
+    Args:
+        smiles_list: List of SMILES strings
+        selected_models: List of model names to use
+    Returns:
+        Dictionary with all predictions, report string
+    """
+    if not selected_models:
+        return None, "❌ Please select at least one model."
+    # Validate SMILES
+    progress(0.1, desc="Validating SMILES...")
+    valid_smiles, invalid_smiles, validation_report = validate_smiles(smiles_list)
+    # Stop if there are any invalid SMILES
+    if invalid_smiles:
+        return None, validation_report
+    if not valid_smiles:
+        return None, "❌ No SMILES provided."
+    # Extract standardized SMILES
+    indices, original_smiles, standardized_smiles = zip(*valid_smiles)
+    # Store all predictions by model
+    all_predictions = {
+        'original_smiles': list(original_smiles),
+        'standardized_smiles': list(standardized_smiles),
+        'predictions': {},  # {model_name: {gas: predictions}}
+        'predictions_log': {}  # Store log-space predictions for plotting
+    }
+    # For sklearn models, prepare fingerprints once
+    X_fp = None
+    needs_fingerprints = any(model in selected_models for model in ['RandomForest', 'GaussianProcess'])
+    if needs_fingerprints:
+        progress(0.2, desc="Computing molecular fingerprints...")
+        X_fp = smiles_to_fingerprint(standardized_smiles)
+    # Track prediction errors
+    model_errors = []
+    # Make predictions for each gas and each model
+    total_predictions = len(all_properties) * len(selected_models)
+    pred_count = 0
+    for model_name in selected_models:
+        all_predictions['predictions'][model_name] = {}
+        all_predictions['predictions_log'][model_name] = {}
+        for gas in all_properties:
+            progress(0.2 + 0.7 * pred_count / total_predictions,
+                    desc=f"Predicting {gas} with {model_name}...")
+            # Check if model is available
+            if model_name not in PRELOADED_MODELS or gas not in PRELOADED_MODELS[model_name]:
+                model_errors.append(f"{model_name} for {gas} (not available)")
+                pred_count += 1
+                continue
+            model, model_type = PRELOADED_MODELS[model_name][gas]
+            # Make predictions
+            try:
+                if model_type == 'torch_molecule':
+                    predictions_dict = model.predict(list(standardized_smiles))
+                    predictions = predictions_dict['prediction']
+                else:  # sklearn
+                    predictions = model.predict(X_fp)
+                # Ensure predictions are 1-dimensional
+                if isinstance(predictions, np.ndarray) and predictions.ndim > 1:
+                    predictions = predictions.flatten()
+                # Store predictions
+                # If trained in log space, store both log and original space
+                if TRAIN_IN_LOG:
+                    # predictions are in log space, convert to original for display
+                    predictions_original = 10**predictions
+                    all_predictions['predictions'][model_name][gas] = predictions_original
+                    all_predictions['predictions_log'][model_name][gas] = predictions
+                else:
+                    # predictions are already in original space
+                    all_predictions['predictions'][model_name][gas] = predictions
+                    all_predictions['predictions_log'][model_name][gas] = np.log10(np.maximum(predictions, 1e-10))
+            except Exception as e:
+                print(f"Error predicting with {model_name} for {gas}: {e}")
+                model_errors.append(f"{model_name} for {gas} (prediction error)")
+            pred_count += 1
+    # Calculate average predictions across models
+    progress(0.9, desc="Computing averages...")
+    all_predictions['predictions']['Average'] = {}
+    all_predictions['predictions_log']['Average'] = {}
+    for gas in all_properties:
+        gas_predictions = []
+        gas_predictions_log = []
+        for model_name in selected_models:
+            if model_name in all_predictions['predictions'] and gas in all_predictions['predictions'][model_name]:
+                gas_predictions.append(all_predictions['predictions'][model_name][gas])
+                gas_predictions_log.append(all_predictions['predictions_log'][model_name][gas])
+        if gas_predictions:
+            if len(gas_predictions) > 1:
+                stacked = np.array(gas_predictions)
+                stacked_log = np.array(gas_predictions_log)
+                all_predictions['predictions']['Average'][gas] = np.mean(stacked, axis=0)
+                all_predictions['predictions_log']['Average'][gas] = np.mean(stacked_log, axis=0)
+            else:
+                all_predictions['predictions']['Average'][gas] = gas_predictions[0]
+                all_predictions['predictions_log']['Average'][gas] = gas_predictions_log[0]
+    # Create summary report
+    report = validation_report + "\n"
+    if model_errors:
+        report += f"\n⚠️ Model issues: {', '.join(model_errors)}\n"
+    report += f"\n✅ Successfully made predictions for {len(valid_smiles)} molecules using {len(selected_models)} model(s)."
+    if TRAIN_IN_LOG:
+        report += f"\n📊 Note: Models were trained in log space. Predictions shown in original space (Barrer)."
+    progress(1.0, desc="Done!")
+    return all_predictions, report
+def format_predictions_dataframe(all_predictions, selected_view='Average'):
+    """
+    Format predictions into a clean DataFrame for display.
+    Args:
+        all_predictions: Dictionary with all predictions
+        selected_view: Which model's predictions to show ('Average' or specific model name)
+    Returns:
+        DataFrame with formatted predictions
+    """
+    if all_predictions is None:
+        return None
+    # Create base DataFrame with only original SMILES
+    df = pd.DataFrame({
+        'Original_SMILES': all_predictions['original_smiles']
+    })
+    # Add predictions for selected view
+    if selected_view in all_predictions['predictions']:
+        for gas in all_properties:
+            if gas in all_predictions['predictions'][selected_view]:
+                predictions = all_predictions['predictions'][selected_view][gas]
+                # Format to 3 decimal places
+                df[gas] = [f"{val:.3f}" for val in predictions]
+            else:
+                df[gas] = ['N/A'] * len(df)
+    return df
+def create_selectivity_plot(all_predictions, selected_view='Average', selectivity_pair='CO2/CH4'):
+    """
+    Create a selectivity plot with 2008 upper bound.
+    Args:
+        all_predictions: Dictionary with all predictions
+        selected_view: Which model's predictions to show
+        selectivity_pair: Which gas pair to plot (e.g., 'CO2/CH4')
+    Returns:
+        Plotly figure
+    """
+    if all_predictions is None or selectivity_pair not in SELECTIVITY_BOUNDS:
+        return None
+    bounds = SELECTIVITY_BOUNDS[selectivity_pair]
+    gas1, gas2 = bounds['gases']
+    # Get predictions - use log space for plotting
+    if selected_view not in all_predictions['predictions_log']:
+        return None
+    if gas1 not in all_predictions['predictions_log'][selected_view] or gas2 not in all_predictions['predictions_log'][selected_view]:
+        return None
+    # Use log-space predictions for more accurate selectivity calculation
+    gas1_perm_log = all_predictions['predictions_log'][selected_view][gas1]
+    gas2_perm_log = all_predictions['predictions_log'][selected_view][gas2]
+    # Convert to original space for plotting
+    gas1_perm = 10**gas1_perm_log
+    gas2_perm = 10**gas2_perm_log
+    # Ensure positive values
+    gas1_perm = np.maximum(gas1_perm, 1e-10)
+    gas2_perm = np.maximum(gas2_perm, 1e-10)
+    # Calculate selectivity
+    selectivity = gas1_perm / gas2_perm
+    # Create boundary line
+    x1, x2 = bounds['x']
+    y1, y2 = bounds['y']
+    # Create figure
+    fig = go.Figure()
+    # Add 2008 upper bound line
+    fig.add_trace(go.Scatter(
+        x=[x1, x2],
+        y=[y1, y2],
+        mode='lines',
+        name='2008 Upper Bound',
+        line=dict(color='red', width=3, dash='dash'),
+        hoverinfo='name'
+    ))
+    # Add polymer points
+    smiles_list = all_predictions['original_smiles']
+    # Determine which polymers are above the bound
+    x_log = np.log10(gas1_perm)
+    y_log = np.log10(selectivity)
+    # Calculate boundary line parameters
+    x1_log, x2_log = np.log10(x1), np.log10(x2)
+    y1_log, y2_log = np.log10(y1), np.log10(y2)
+    a = (y1_log - y2_log) / (x1_log - x2_log)
+    b = y1_log - a * x1_log
+    # Calculate distance from boundary
+    y_bound = a * x_log + b
+    above_bound = y_log > y_bound
+    # Truncate long SMILES for hover text
+    hover_texts = []
+    for i, smiles in enumerate(smiles_list):
+        truncated = smiles if len(smiles) <= 100 else smiles[:97] + '...'
+        status = "Above Bound" if above_bound[i] else "Below Bound"
+        hover_text = (f"SMILES: {truncated}<br>"
+                     f"{gas1}: {gas1_perm[i]:.3f}<br>"
+                     f"{gas2}: {gas2_perm[i]:.3f}<br>"
+                     f"Selectivity: {selectivity[i]:.3f}<br>"
+                     f"Status: {status}")
+        hover_texts.append(hover_text)
+    # Add points (above bound)
+    if np.any(above_bound):
+        fig.add_trace(go.Scatter(
+            x=gas1_perm[above_bound],
+            y=selectivity[above_bound],
+            mode='markers',
+            name='Above Bound',
+            marker=dict(color='green', size=10, symbol='circle'),
+            text=[hover_texts[i] for i in range(len(hover_texts)) if above_bound[i]],
+            hovertemplate='%{text}<extra></extra>'
+        ))
+    # Add points (below bound)
+    if np.any(~above_bound):
+        fig.add_trace(go.Scatter(
+            x=gas1_perm[~above_bound],
+            y=selectivity[~above_bound],
+            mode='markers',
+            name='Below Bound',
+            marker=dict(color='blue', size=8, symbol='circle'),
+            text=[hover_texts[i] for i in range(len(hover_texts)) if not above_bound[i]],
+            hovertemplate='%{text}<extra></extra>'
+        ))
+    # Update layout
+    fig.update_xaxes(
+        title=f"{gas1} Permeability (Barrer)",
+        type="log",
+        gridcolor='lightgray'
+    )
+    fig.update_yaxes(
+        title=f"{gas1}/{gas2} Selectivity",
+        type="log",
+        gridcolor='lightgray'
+    )
+    fig.update_layout(
+        title=f"{gas1}/{gas2} Selectivity Plot",
+        hovermode='closest',
+        showlegend=True,
+        plot_bgcolor='white',
+        height=600
+    )
+    return fig
+def get_polymers_above_bound(all_predictions, selected_view='Average', selectivity_pair='CO2/CH4'):
+    """
+    Get list of polymers above the 2008 upper bound.
+    Returns:
+        String listing polymers above bound
+    """
+    if all_predictions is None or selectivity_pair not in SELECTIVITY_BOUNDS:
+        return "No data available."
+    bounds = SELECTIVITY_BOUNDS[selectivity_pair]
+    gas1, gas2 = bounds['gases']
+    # Get predictions - use log space for calculation
+    if selected_view not in all_predictions['predictions_log']:
+        return "No predictions available for selected view."
+    if gas1 not in all_predictions['predictions_log'][selected_view] or gas2 not in all_predictions['predictions_log'][selected_view]:
+        return f"Predictions not available for {gas1} or {gas2}."
+    # Use log-space predictions
+    gas1_perm_log = all_predictions['predictions_log'][selected_view][gas1]
+    gas2_perm_log = all_predictions['predictions_log'][selected_view][gas2]
+    # Convert to original space
+    gas1_perm = 10**gas1_perm_log
+    gas2_perm = 10**gas2_perm_log
+    # Ensure positive values
+    gas1_perm = np.maximum(gas1_perm, 1e-10)
+    gas2_perm = np.maximum(gas2_perm, 1e-10)
+    # Calculate selectivity
+    selectivity = gas1_perm / gas2_perm
+    # Calculate which are above bound
+    x_log = np.log10(gas1_perm)
+    y_log = np.log10(selectivity)
+    x1, x2 = bounds['x']
+    y1, y2 = bounds['y']
+    x1_log, x2_log = np.log10(x1), np.log10(x2)
+    y1_log, y2_log = np.log10(y1), np.log10(y2)
+    a = (y1_log - y2_log) / (x1_log - x2_log)
+    b = y1_log - a * x1_log
+    y_bound = a * x_log + b
+    above_bound = y_log > y_bound
+    # Create report
+    smiles_list = all_predictions['original_smiles']
+    above_count = np.sum(above_bound)
+    report = f"**Polymers Above 2008 Upper Bound: {above_count}/{len(smiles_list)}**\n\n"
+    if above_count == 0:
+        report += "No polymers exceed the 2008 upper bound.\n"
+    else:
+        report += "| # | SMILES | " + gas1 + " | " + gas2 + " | Selectivity |\n"
+        report += "|---|--------|" + "-"*len(gas1) + "|" + "-"*len(gas2) + "|-------------|\n"
+        idx = 1
+        for i in range(len(smiles_list)):
+            if above_bound[i]:
+                smiles = smiles_list[i]
+                # Truncate if too long
+                if len(smiles) > 50:
+                    smiles = smiles[:47] + "..."
+                report += f"| {idx} | `{smiles}` | {gas1_perm[i]:.3f} | {gas2_perm[i]:.3f} | {selectivity[i]:.3f} |\n"
+                idx += 1
+    return report
+def process_smiles_input(text_input, file_input, selected_models):
+    """Process SMILES from text or file input."""
+    smiles_list = []
+    # Process text input
+    if text_input and text_input.strip():
+        lines = text_input.strip().split('\n')
+        smiles_list.extend([line.strip() for line in lines if line.strip()])
+    # Process file input
+    if file_input is not None:
+        try:
+            # Handle different file formats
+            file_path = file_input if isinstance(file_input, str) else file_input.name
+            # Try to read as CSV first
+            if file_path.endswith('.csv'):
+                df = pd.read_csv(file_input if isinstance(file_input, str) else file_input.name)
+                if 'SMILES' in df.columns:
+                    # Read from SMILES column
+                    smiles_from_file = df['SMILES'].dropna().astype(str).tolist()
+                    smiles_list.extend([s.strip() for s in smiles_from_file if s.strip()])
+                else:
+                    return None, f"❌ CSV file must contain a 'SMILES' column. Found columns: {', '.join(df.columns)}", []
+            else:
+                # Read as plain text file (.txt, .smi)
+                if isinstance(file_input, str):
+                    with open(file_input, 'r') as f:
+                        lines = f.readlines()
+                else:
+                    content = file_input.read()
+                    if isinstance(content, bytes):
+                        content = content.decode('utf-8')
+                    lines = content.strip().split('\n')
+                smiles_list.extend([line.strip() for line in lines if line.strip()])
+        except Exception as e:
+            return None, f"❌ Error reading file: {str(e)}", []
+    if not smiles_list:
+        return None, "❌ Please provide SMILES strings via text input or file upload.", []
+    # Remove duplicates while preserving order
+    seen = set()
+    unique_smiles = []
+    for s in smiles_list:
+        if s not in seen:
+            seen.add(s)
+            unique_smiles.append(s)
+    # Make predictions
+    all_predictions, report = predict_properties(unique_smiles, selected_models)
+    # Get available view options
+    view_options = []
+    if all_predictions:
+        view_options = ['Average'] + [m for m in selected_models if m in all_predictions['predictions']]
+    return all_predictions, report, view_options
+# ============= GRADIO INTERFACE =============
+# Get available models for the interface
+available_models = []
+for model_name in all_model_names:
+    if model_name in PRELOADED_MODELS and PRELOADED_MODELS[model_name]:
+        available_models.append(model_name)
+if not available_models:
+    print("⚠️ WARNING: No models were successfully loaded!")
+    available_models = all_model_names  # Show all options but they won't work
+with gr.Blocks(title="Polymer Property Prediction for Gas Permeability and Separation") as iface:
+    # Navigation Bar
+    with gr.Row(elem_id="navbar"):
+        gr.Markdown("""
+        <div style="text-align: center;">
+            <h1>🔬 Polymer Property Prediction for Gas Permeability and Separation</h1>
+            <div style="display: flex; gap: 20px; justify-content: center; align-items: center; margin-top: 10px;">
+                <a href="https://github.com/liugangcode/torch-molecule" target="_blank" style="display: flex; align-items: center; gap: 5px; text-decoration: none; color: inherit;">
+                    <img src="https://img.icons8.com/ios-glyphs/30/000000/github.png" alt="GitHub" />
+                    <span>💻 Support by torch-molecule and sklearn</span>
+                </a>
+            </div>
+        </div>
+        """)
+    # Main content
+    gr.Markdown("""
+    ## Batch Property Prediction for gas permeability properties (CH₄, CO₂, H₂, N₂, O₂)
+    **Input Options:**
+    - **Text Box**: Enter SMILES strings (one per line)
+    - **File Upload**: Upload a text file containing SMILES strings (.txt, .csv, .smi), see example file format for details
+    **Model Selection**: Choose one or more prediction models. If multiple models are selected, an averaged prediction will also be provided.
+    ⚠️ **Note**: All SMILES must be valid. Invalid SMILES will prevent prediction and must be corrected first. We treat the * as the polymerization point.
+    """)
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### Input SMILES")
+            smiles_text = gr.Textbox(
+                label="Enter SMILES (one per line)",
+                placeholder="Enter SMILES here...",
+                lines=10,
+                value=DEFAULT_SMILES
+            )
+            smiles_file = gr.File(
+                label="Or upload a file with SMILES",
+                file_types=[".txt", ".csv", ".smi"]
+            )
+            with gr.Accordion("📄 Example File Format", open=False):
+                gr.Markdown("""
+                **For CSV files (.csv):**
+                Your CSV file must contain a column named "SMILES". Other columns are optional.
+                Example CSV content:
+                ```
+                SMILES,Name,Notes
+                *c1cc2c(cc1*)C1(C(C)C)c3ccccc3C2(C(C)C)c2cc3c(cc21)Oc1cc2nc(*)c(*)nc2cc1O3,Polymer1,High performance
+                *CN1CN(*)Cc2cc3c(cc21)C1c2ccccc2C3c2cc(*)c(*)cc21,Polymer2,Good selectivity
+                *C(=C(*)c1ccc2c(c1)C(C)(C)C(C)(C)C2(C)C)c1ccccc1,Polymer3,Standard
+                ```
+                **For text files (.txt, .smi):**
+                Simply list one SMILES per line:
+                ```
+                *c1cc2c(cc1*)C1(C(C)C)c3ccccc3C2(C(C)C)c2cc3c(cc21)Oc1cc2nc(*)c(*)nc2cc1O3
+                *CN1CN(*)Cc2cc3c(cc21)C1c2ccccc2C3c2cc(*)c(*)cc21
+                *C(=C(*)c1ccc2c(c1)C(C)(C)C(C)(C)C2(C)C)c1ccccc1
+                ```
+                """)
+        with gr.Column():
+            gr.Markdown("### Model Selection")
+            model_selector = gr.CheckboxGroup(
+                choices=available_models,
+                label="Select Models to Use",
+                value=[available_models[0]] if available_models else [],
+                info="Select one or more models. Predictions will be averaged if multiple models are selected."
+            )
+            with gr.Accordion("ℹ️ Model Information", open=True):
+                gr.Markdown("""
+                **Available Models:**
+                - **GREA**: Graph Rationalization with Environment-based Augmentations (Deep Learning)
+                <a href="https://arxiv.org/abs/2206.02886" target="_blank" style="text-decoration: none; color: inherit;">
+                    📄 View Paper
+                </a>
+                - **GCN**: Graph Convolutional Network (Deep Learning)
+                - **GIN**: Graph Isomorphism Network (Deep Learning)
+                - **RandomForest**: Random Forest Regressor (ML)
+                - **GaussianProcess**: Gaussian Process Regressor (ML)
+                **Gas Properties:**
+                - CH₄: Methane permeability
+                - CO₂: Carbon dioxide permeability
+                - H₂: Hydrogen permeability
+                - N₂: Nitrogen permeability
+                - O₂: Oxygen permeability
+                Units are in Barrer (10⁻¹⁰ cm³(STP)·cm/(cm²·s·cmHg))
+                """)
+    predict_btn = gr.Button("🔮 Predict Properties", variant="primary", size="lg")
+    with gr.Row():
+        prediction_status = gr.Textbox(label="Status", lines=5)
+    with gr.Row():
+        view_selector = gr.Radio(
+            choices=['Average'],
+            label="Select which predictions to display",
+            value='Average',
+            visible=False
+        )
+    with gr.Row():
+        prediction_results = gr.Dataframe(
+            label="Prediction Results",
+            wrap=True,
+            interactive=False
+        )
+    with gr.Row():
+        download_btn = gr.DownloadButton(
+            label="📥 Download Results as CSV",
+            visible=False
+        )
+    # Selectivity Plot Section
+    gr.Markdown("## Gas Selectivity Analysis")
+    gr.Markdown("Visualize polymer performance against the 2008 upper bound for gas separation.")
+    with gr.Row():
+        selectivity_pair_selector = gr.Radio(
+            choices=list(SELECTIVITY_BOUNDS.keys()),
+            label="Select Gas Pair",
+            value='CO2/CH4'
+        )
+    with gr.Row():
+        selectivity_plot = gr.Plot(label="Selectivity Plot")
+    with gr.Row():
+        polymers_above_bound = gr.Markdown("Run prediction to see polymers above the bound.")
+    # Hidden state to store all predictions
+    all_predictions_state = gr.State(None)
+    def on_predict(text_input, file_input, selected_models):
+        all_predictions, report, view_options = process_smiles_input(text_input, file_input, selected_models)
+        if all_predictions is not None:
+            # Format DataFrame for display
+            df = format_predictions_dataframe(all_predictions, 'Average')
+            # Update view selector with available options
+            view_selector_update = gr.Radio(
+                choices=view_options,
+                value='Average',
+                visible=True
+            )
+            # Save raw predictions to CSV for download
+            temp_csv = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.csv')
+            df.to_csv(temp_csv.name, index=False)
+            temp_csv.close()
+            # Create selectivity plot
+            plot_fig = create_selectivity_plot(all_predictions, 'Average', 'CO2/CH4')
+            # Get polymers above bound
+            above_bound_report = get_polymers_above_bound(all_predictions, 'Average', 'CO2/CH4')
+            return (
+                all_predictions,
+                df,
+                report,
+                view_selector_update,
+                gr.DownloadButton(
+                    label="📥 Download Results as CSV",
+                    value=temp_csv.name,
+                    visible=True
+                ),
+                plot_fig,
+                above_bound_report
+            )
+        else:
+            return (
+                None,
+                None,
+                report,
+                gr.Radio(visible=False),
+                gr.DownloadButton(visible=False),
+                None,
+                "Run prediction to see polymers above the bound."
+            )
+    def on_view_change(all_predictions, selected_view, selectivity_pair):
+        if all_predictions is None:
+            return None, gr.DownloadButton(visible=False), None, "No data available."
+        df = format_predictions_dataframe(all_predictions, selected_view)
+        # Update download with new view
+        temp_csv = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.csv')
+        df.to_csv(temp_csv.name, index=False)
+        temp_csv.close()
+        # Update plot
+        plot_fig = create_selectivity_plot(all_predictions, selected_view, selectivity_pair)
+        # Get polymers above bound
+        above_bound_report = get_polymers_above_bound(all_predictions, selected_view, selectivity_pair)
+        return df, gr.DownloadButton(
+            label=f"📥 Download {selected_view} Results as CSV",
+            value=temp_csv.name,
+            visible=True
+        ), plot_fig, above_bound_report
+    def on_selectivity_change(all_predictions, selected_view, selectivity_pair):
+        if all_predictions is None:
+            return None, "No data available."
+        plot_fig = create_selectivity_plot(all_predictions, selected_view, selectivity_pair)
+        above_bound_report = get_polymers_above_bound(all_predictions, selected_view, selectivity_pair)
+        return plot_fig, above_bound_report
+    predict_btn.click(
+        on_predict,
+        inputs=[smiles_text, smiles_file, model_selector],
+        outputs=[all_predictions_state, prediction_results, prediction_status, view_selector, download_btn, selectivity_plot, polymers_above_bound]
+    )
+    view_selector.change(
+        on_view_change,
+        inputs=[all_predictions_state, view_selector, selectivity_pair_selector],
+        outputs=[prediction_results, download_btn, selectivity_plot, polymers_above_bound]
+    )
+    selectivity_pair_selector.change(
+        on_selectivity_change,
+        inputs=[all_predictions_state, view_selector, selectivity_pair_selector],
+        outputs=[selectivity_plot, polymers_above_bound]
+    )
+# Launch the interface
+if __name__ == "__main__":
+    # iface.launch(share=True)
+    iface.launch(share=False)

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+pyarrow
+pandas
+joblib
+scikit-learn==1.3.2
+rdkit==2023.9.6
+torch
+huggingface_hub
+gradio
+imageio
+spaces
+torch-molecule
+plotly