Spaces:

Nugget-cloud
/

nasa-space-apps

Sleeping

App Files Files Community

Nugget-cloud commited on Oct 5, 2025

Commit

ec77d63

verified ·

1 Parent(s): 680083f

Update app.py

Browse files

Files changed (1) hide show

app.py +243 -181

app.py CHANGED Viewed

@@ -1,196 +1,258 @@
 import gradio as gr
 import joblib
 import numpy as np
 from huggingface_hub import hf_hub_download
-import json
-# Global variables to store loaded models
-ensemble_model = None
-feature_scaler = None
-feature_imputer = None
-variance_selector = None
-feature_info = None
-model_metrics = None
-def load_models():
-    """Load all models from the Hugging Face repository"""
-    global ensemble_model, feature_scaler, feature_imputer, variance_selector, feature_info, model_metrics
     try:
-        # Load models from your repository
-        repo_id = "Nugget-cloud/nasa-space-apps-exoplanet"
-        print("Loading ensemble model...")
-        ensemble_model = joblib.load(hf_hub_download(repo_id, "exoplanet_ensemble_model.joblib"))
-        print("Loading feature scaler...")
-        feature_scaler = joblib.load(hf_hub_download(repo_id, "feature_scaler.joblib"))
-        print("Loading feature imputer...")
-        feature_imputer = joblib.load(hf_hub_download(repo_id, "feature_imputer.joblib"))
-        print("Loading variance selector...")
-        variance_selector = joblib.load(hf_hub_download(repo_id, "variance_selector.joblib"))
-        # Optional files
-        try:
-            print("Loading feature info...")
-            feature_info = joblib.load(hf_hub_download(repo_id, "feature_info.joblib"))
-        except:
-            print("Feature info not found, skipping...")
-            feature_info = None
-        try:
-            print("Loading model metrics...")
-            model_metrics = joblib.load(hf_hub_download(repo_id, "model_metrics.joblib"))
-        except:
-            print("Model metrics not found, skipping...")
-            model_metrics = None
-        print("All models loaded successfully!")
-        return True
     except Exception as e:
-        print(f"Error loading models: {str(e)}")
-        return False
-def predict_exoplanet(features_input):
-    """Make prediction using the loaded models"""
-    global ensemble_model, feature_scaler, feature_imputer, variance_selector
-    try:
-        # Load models if not already loaded
-        if ensemble_model is None:
-            if not load_models():
-                return {"error": "Failed to load models"}
-        # Parse input features
-        if isinstance(features_input, str):
-            # If input is comma-separated string
-            features = [float(x.strip()) for x in features_input.split(',')]
-        elif isinstance(features_input, list):
-            # If input is already a list
-            features = [float(x) for x in features_input]
-        else:
-            return {"error": "Invalid input format. Expected comma-separated string or list of numbers."}
-        # Convert to numpy array
-        features_array = np.array(features).reshape(1, -1)
-        print(f"Original features shape: {features_array.shape}")
-        print(f"Original features: {features_array}")
-        # Apply preprocessing pipeline
-        # 1. Impute missing values
-        if feature_imputer:
-            features_array = feature_imputer.transform(features_array)
-            print(f"After imputation: {features_array.shape}")
-        # 2. Scale features
-        if feature_scaler:
-            features_array = feature_scaler.transform(features_array)
-            print(f"After scaling: {features_array.shape}")
-        # 3. Select features (variance threshold)
-        if variance_selector:
-            features_array = variance_selector.transform(features_array)
-            print(f"After variance selection: {features_array.shape}")
-        # 4. Make prediction
-        prediction = ensemble_model.predict(features_array)[0]
-        # Get prediction probabilities if available
-        probabilities = None
-        if hasattr(ensemble_model, 'predict_proba'):
-            probabilities = ensemble_model.predict_proba(features_array)[0].tolist()
-        result = {
-            "success": True,
-            "prediction": int(prediction),
-            "probabilities": probabilities,
-            "confidence": max(probabilities) if probabilities else None,
-            "input_features_count": len(features),
-            "processed_features_count": features_array.shape[1],
-            "model_info": {
-                "model_type": str(type(ensemble_model).__name__),
-                "has_probabilities": hasattr(ensemble_model, 'predict_proba')
-            }
         }
-        if feature_info:
-            result["feature_info"] = feature_info
-        if model_metrics:
-            result["model_metrics"] = model_metrics
-        return result
     except Exception as e:
-        return {
-            "success": False,
-            "error": str(e)
-        }
-def predict_api(features_str):
-    """API endpoint function"""
-    result = predict_exoplanet(features_str)
-    return result
-# Create Gradio interface with API support
-def create_interface():
-    with gr.Blocks(title="🪐 Exoplanet Classification API") as iface:
-        gr.Markdown("# 🪐 Exoplanet Classification Model")
-        gr.Markdown("Enter comma-separated feature values for exoplanet prediction using NASA Kepler/TESS data.")
-        with gr.Row():
-            with gr.Column():
-                features_input = gr.Textbox(
-                    label="Features (comma-separated)",
-                    placeholder="1.2,3.4,5.6,7.8,9.1,2.3,4.5,6.7",
-                    info="Enter numerical features separated by commas"
-                )
-                predict_btn = gr.Button("Predict", variant="primary")
-            with gr.Column():
-                output = gr.JSON(label="Prediction Result")
-        # Connect the button click to the function
-        predict_btn.click(
-            fn=predict_api,
-            inputs=features_input,
-            outputs=output,
-            api_name="predict"  # This creates an API endpoint
-        )
-        # Example inputs
-        gr.Markdown("### Example Inputs:")
-        gr.Markdown("Try these example feature sets:")
-        examples = gr.Examples(
-            examples=[
-                ["1.2,3.4,5.6,7.8,9.1,2.3,4.5,6.7"],
-                ["0.5,1.8,2.1,4.2,6.3,1.9,3.7,5.2"],
-                ["2.1,4.3,6.5,8.7,10.9,3.2,5.4,7.6"]
-            ],
-            inputs=features_input,
-            outputs=output,
-            fn=predict_api
-        )
-        gr.Markdown("""
-        ### API Usage
-        This Space provides an API endpoint at `/api/predict` that accepts:
-        ```json
-        {"data": ["1.2,3.4,5.6,7.8,9.1,2.3,4.5,6.7"]}
-        ```
-        """)
-    return iface
 if __name__ == "__main__":
-    demo = create_interface()
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=True
-    )

 import gradio as gr
 import joblib
+import pandas as pd
 import numpy as np
+import plotly.graph_objects as go
+import plotly.express as px
 from huggingface_hub import hf_hub_download
+import warnings
+# ==================== CONFIGURATION ====================
+# ⚠️ UPDATE THIS WITH YOUR HUGGING FACE REPOSITORY ID
+HF_REPO_ID = "YOUR_USERNAME/YOUR_REPO_NAME"
+MODEL_FILENAME = "exoplanet_final_model.joblib"
+# Suppress specific warnings for a cleaner output
+warnings.filterwarnings("ignore", category=UserWarning, message="Trying to unpickle estimator.*")
+warnings.filterwarnings("ignore", category=FutureWarning)
+# ==================== LOAD MODEL FROM HUGGING FACE ====================
+@gr.cache(show_api=False)
+def load_model_package(repo_id, filename):
+    """Load the complete model package from Hugging Face Hub"""
     try:
+        model_path = hf_hub_download(repo_id=repo_id, filename=filename)
+        package = joblib.load(model_path)
+        return package
     except Exception as e:
+        # Fallback for local development if HF download fails
+        print(f"Could not download from Hugging Face: {e}. Trying local file...")
+        try:
+            package = joblib.load(filename)
+            return package
+        except FileNotFoundError:
+            raise gr.Error(f"Model file not found locally or on Hugging Face at {repo_id}. Please check HF_REPO_ID and ensure the model file is available.")
+        except Exception as e_local:
+            raise gr.Error(f"Error loading local model: {e_local}")
+# Load package and extract components
+try:
+    print("Loading AI model...")
+    package = load_model_package(HF_REPO_ID, MODEL_FILENAME)
+    model = package['ensemble_model']
+    scaler = package['scaler']
+    feature_names = package['feature_names']
+    metadata = package.get('metadata', {}) # Use .get for safety
+    print("AI model loaded successfully.")
+except Exception as e:
+    # If model loading fails, we can't run the app.
+    print(str(e))
+    # Create a dummy structure to prevent the UI from crashing on startup
+    model, scaler, feature_names, metadata = None, None, [], {'missions': ['N/A'], 'version': 'Error'}
+# ==================== PREDICTION LOGIC ====================
+def predict_exoplanet(period, duration, depth, planet_radius, equilibrium_temp, insolation, star_radius, star_temp, star_logg, mission):
+    """Predicts if a candidate is an exoplanet based on input features."""
+    if not model:
+        raise gr.Error("Model is not loaded. Cannot perform prediction.")
+    # Create feature dictionary from inputs
+    features_dict = {}
+    # Basic features
+    feature_map = {
+        'period': period, 'duration': duration, 'depth': depth,
+        'planet_radius': planet_radius, 'star_radius': star_radius,
+        'star_temp': star_temp, 'star_logg': star_logg,
+        'equilibrium_temp': equilibrium_temp, 'insolation_flux': insolation
+    }
+    for fname, fval in feature_map.items():
+        if fname in feature_names:
+            features_dict[fname] = fval
+    # Engineered features
+    if 'transit_period_ratio' in feature_names and period > 0:
+        features_dict['transit_period_ratio'] = duration / (period * 24)
+    if 'radius_ratio' in feature_names and star_radius > 0:
+        features_dict['radius_ratio'] = planet_radius / star_radius
+    if 'period_log' in feature_names and period > 0:
+        features_dict['period_log'] = np.log10(period)
+    if 'insolation_flux_log' in feature_names and insolation > 0:
+        features_dict['insolation_flux_log'] = np.log10(insolation)
+    if 'habitable_zone_dist' in feature_names:
+        features_dict['habitable_zone_dist'] = abs(equilibrium_temp - 288) / 288
+    if 'star_class' in feature_names:
+        if star_temp >= 7500: features_dict['star_class'] = 5
+        elif star_temp >= 6000: features_dict['star_class'] = 4
+        elif star_temp >= 5200: features_dict['star_class'] = 3
+        elif star_temp >= 3700: features_dict['star_class'] = 2
+        else: features_dict['star_class'] = 1
+    if 'luminosity_class' in feature_names:
+        if star_logg < 3.5: features_dict['luminosity_class'] = 3
+        elif star_logg < 4.0: features_dict['luminosity_class'] = 2
+        else: features_dict['luminosity_class'] = 1
+    for m in metadata.get('missions', []):
+        col_name = f'mission_{m}'
+        if col_name in feature_names:
+            features_dict[col_name] = 1 if m == mission else 0
+    # Create feature vector in the correct order
+    feature_vector = [features_dict.get(f, 0) for f in feature_names]
+    X_input = np.array(feature_vector).reshape(1, -1)
+    # Scale and predict
+    X_scaled = scaler.transform(X_input)
+    prediction = model.predict(X_scaled)[0]
+    probabilities = model.predict_proba(X_scaled)[0]
+    # Prepare outputs
+    result_label_val = "PLANET DETECTED!" if prediction == 1 else "FALSE POSITIVE"
+    confidence = probabilities[1] if prediction == 1 else probabilities[0]
+    # Probability gauge
+    gauge_fig = go.Figure(go.Indicator(
+        mode="gauge+number",
+        value=probabilities[1] * 100,
+        title={'text': "Planet Probability (%)"},
+        gauge={
+            'axis': {'range': [0, 100]},
+            'bar': {'color': "darkblue"},
+            'steps': [{'range': [0, 50], 'color': "lightgray"}, {'range': [50, 100], 'color': "lightgreen"}],
+            'threshold': {'line': {'color': "red", 'width': 4}, 'thickness': 0.75, 'value': 50}
         }
+    ))
+    gauge_fig.update_layout(height=250, margin=dict(l=20, r=20, t=50, b=20))
+    return {result_label_val: confidence}, gauge_fig
+# ==================== BATCH ANALYSIS LOGIC ====================
+def batch_analysis(file_obj):
+    """Performs batch prediction on an uploaded CSV file."""
+    if not model:
+        raise gr.Error("Model is not loaded. Cannot perform batch analysis.")
+    if file_obj is None:
+        return None, "Please upload a file first."
+    try:
+        df_upload = pd.read_csv(file_obj.name)
     except Exception as e:
+        return None, f"Error reading CSV: {e}"
+    # For this simplified batch prediction, we only use columns that directly match model features.
+    # A more robust implementation would perform full feature engineering for each row.
+    X_batch = pd.DataFrame(columns=feature_names)
+    for col in feature_names:
+        if col in df_upload.columns:
+            X_batch[col] = df_upload[col]
+        else:
+            X_batch[col] = 0 # Fill missing feature columns with 0
+    X_batch = X_batch.fillna(0)
+    # Scale and predict
+    X_scaled = scaler.transform(X_batch)
+    predictions = model.predict(X_scaled)
+    probabilities = model.predict_proba(X_scaled)[:, 1]
+    # Add results to a new dataframe for clarity
+    results_df = df_upload.copy()
+    results_df['prediction'] = ['Planet' if p == 1 else 'False Positive' for p in predictions]
+    results_df['planet_probability'] = probabilities
+    return results_df, f"Analysis complete for {len(results_df)} candidates."
+# ==================== GRADIO UI ====================
+css = """
+.main-header { font-size: 2.5rem; font-weight: bold; text-align: center; }
+.sub-header { text-align: center; color: #666; font-size: 1.2rem; }
+"""
+with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
+    gr.Markdown('<div class="main-header">🪐 NASA Exoplanet AI Detector</div>', elem_classes="main-header")
+    gr.Markdown(f"<div class='sub-header'>AI-Powered Exoplanet Detection | Model Version: {metadata.get('version', 'N/A')}</div>", elem_classes="sub-header")
+    with gr.Tabs():
+        with gr.TabItem("Single Prediction"):
+            gr.Markdown("### Analyze a Single Exoplanet Candidate")
+            with gr.Row():
+                with gr.Column(scale=2):
+                    with gr.Accordion("Orbital & Planet Properties", open=True):
+                        period = gr.Slider(0.0, 10000.0, value=10.0, label="Orbital Period (days)")
+                        duration = gr.Slider(0.0, 48.0, value=3.0, label="Transit Duration (hours)")
+                        depth = gr.Slider(0.0, 100000.0, value=1000.0, label="Transit Depth (ppm)")
+                        planet_radius = gr.Slider(0.1, 100.0, value=1.0, label="Planet Radius (Earth radii)")
+                        equilibrium_temp = gr.Slider(0, 5000, value=288, label="Equilibrium Temperature (K)")
+                        insolation = gr.Slider(0.0, 10000.0, value=1.0, label="Insolation Flux (Earth units)")
+                    with gr.Accordion("Stellar Properties & Mission", open=True):
+                        star_radius = gr.Slider(0.1, 50.0, value=1.0, label="Star Radius (Solar radii)")
+                        star_temp = gr.Slider(2000, 50000, value=5778, label="Star Temperature (K)")
+                        star_logg = gr.Slider(0.0, 5.0, value=4.4, label="Star log(g)")
+                        mission = gr.Dropdown(metadata.get('missions', ['N/A']), label="Mission", value=metadata.get('missions', ['N/A'])[0])
+                    predict_btn = gr.Button("Analyze Candidate", variant="primary")
+                with gr.Column(scale=1):
+                    gr.Markdown("### Prediction Results")
+                    result_label = gr.Label(label="Prediction")
+                    gauge_plot = gr.Plot(label="Probability Gauge")
+            predict_btn.click(
+                fn=predict_exoplanet,
+                inputs=[period, duration, depth, planet_radius, equilibrium_temp, insolation, star_radius, star_temp, star_logg, mission],
+                outputs=[result_label, gauge_plot],
+                api_name="predict"
+            )
+        with gr.TabItem("Batch Analysis"):
+            gr.Markdown("### Batch Analysis of Exoplanet Candidates")
+            gr.Info("Upload a CSV file. The file should contain columns matching the model's features for best results.")
+            with gr.Row():
+                file_input = gr.File(label="Upload CSV", file_types=[".csv"])
+                batch_status = gr.Textbox(label="Status", interactive=False)
+            batch_run_btn = gr.Button("⚡ Analyze All Candidates", variant="primary")
+            gr.Markdown("### Results")
+            batch_output_df = gr.DataFrame(label="Batch Results")
+            batch_run_btn.click(fn=batch_analysis, inputs=[file_input], outputs=[batch_output_df, batch_status], api_name="batch_predict")
+        with gr.TabItem("Model Analytics"):
+            gr.Markdown("### Model Performance & Dataset Information")
+            with gr.Row():
+                gr.Textbox(f"{metadata.get('test_accuracy', 0)*100:.2f}%", label="Test Accuracy")
+                gr.Textbox(f"{metadata.get('test_precision', 0):.3f}", label="Precision")
+                gr.Textbox(f"{metadata.get('test_recall', 0):.3f}", label="Recall")
+                gr.Textbox(f"{metadata.get('test_f1_score', 0):.3f}", label="F1 Score")
+                gr.Textbox(f"{metadata.get('test_roc_auc', 0):.3f}", label="ROC-AUC")
+            if 'validation_scores' in metadata:
+                gr.Markdown("### Individual Model Performance (Validation Set)")
+                val_scores_df = pd.DataFrame([{"Model": k, "ROC-AUC": v} for k, v in metadata['validation_scores'].items()]).sort_values('ROC-AUC', ascending=False)
+                fig = px.bar(val_scores_df, x='ROC-AUC', y='Model', orientation='h', title='Model Comparison (Validation ROC-AUC)', color='ROC-AUC', color_continuous_scale='viridis')
+                fig.update_layout(height=400, yaxis={'categoryorder':'total ascending'})
+                gr.Plot(value=fig)
+        with gr.TabItem("ℹ About"):
+            gr.Markdown("""
+            ### 🚀 Project Overview
+            This application provides an interface for an AI model designed to detect exoplanets from NASA telescope data. It is built for the **NASA Space Apps Challenge 2025**.
+            ### 📊 Data Sources
+            The model was trained on publicly available data from multiple NASA missions, including Kepler, K2, and TESS.
+            ### 🤖 Machine Learning Approach
+            The core of this system is a sophisticated **ensemble model**, which combines the predictions of several machine learning algorithms to achieve higher accuracy and robustness.
+            ### 🔗 Resources
+            - [NASA Exoplanet Archive](https://exoplanetarchive.ipac.caltech.edu/)
+            - [NASA Space Apps Challenge](https://www.spaceappschallenge.org/)
+            - [Hugging Face (for model hosting)](https://huggingface.co/)
+            - [Gradio (for the web UI)](https://www.gradio.app/)
+            """)
 if __name__ == "__main__":
+    demo.launch()