Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import time | |
| import traceback | |
| from pathlib import Path | |
| from typing import Dict, Any, List, Tuple | |
| import pandas as pd | |
| import gradio as gr | |
| import papermill as pm | |
| import plotly.graph_objects as go | |
| # Optional LLM (kept for compatibility with template) | |
| try: | |
| from huggingface_hub import InferenceClient | |
| except Exception: | |
| InferenceClient = None | |
| # ========================================================= | |
| # CONFIG | |
| # ========================================================= | |
| BASE_DIR = Path(__file__).resolve().parent | |
| NB1 = os.environ.get("NB1", "datacreation.ipynb").strip() | |
| NB2 = os.environ.get("NB2", "pythonanalysis.ipynb").strip() | |
| RUNS_DIR = BASE_DIR / "runs" | |
| ART_DIR = BASE_DIR / "artifacts" | |
| PY_FIG_DIR = ART_DIR / "py" / "figures" | |
| PY_TAB_DIR = ART_DIR / "py" / "tables" | |
| PAPERMILL_TIMEOUT = int(os.environ.get("PAPERMILL_TIMEOUT", "1800")) | |
| MAX_PREVIEW_ROWS = int(os.environ.get("MAX_FILE_PREVIEW_ROWS", "50")) | |
| MAX_LOG_CHARS = int(os.environ.get("MAX_LOG_CHARS", "8000")) | |
| HF_API_KEY = os.environ.get("HF_API_KEY", "").strip() | |
| MODEL_NAME = os.environ.get("MODEL_NAME", "deepseek-ai/DeepSeek-R1").strip() | |
| HF_PROVIDER = os.environ.get("HF_PROVIDER", "novita").strip() | |
| N8N_WEBHOOK_URL = os.environ.get("N8N_WEBHOOK_URL", "").strip() | |
| LLM_ENABLED = False | |
| llm_client = None | |
| # ========================================================= | |
| # HELPERS | |
| # ========================================================= | |
| def ensure_dirs(): | |
| for p in [RUNS_DIR, ART_DIR, PY_FIG_DIR, PY_TAB_DIR]: | |
| p.mkdir(parents=True, exist_ok=True) | |
| def stamp(): | |
| return time.strftime("%Y%m%d-%H%M%S") | |
| def tail(text: str, n: int = MAX_LOG_CHARS) -> str: | |
| return (text or "")[-n:] | |
| def _ls(dir_path: Path, exts: Tuple[str, ...]) -> List[str]: | |
| if not dir_path.is_dir(): | |
| return [] | |
| return sorted( | |
| p.name for p in dir_path.iterdir() | |
| if p.is_file() and p.suffix.lower() in exts | |
| ) | |
| def _read_csv(path: Path) -> pd.DataFrame: | |
| return pd.read_csv(path, nrows=MAX_PREVIEW_ROWS) | |
| def _read_json(path: Path): | |
| with path.open(encoding="utf-8") as f: | |
| return json.load(f) | |
| def artifacts_index() -> Dict[str, Any]: | |
| return { | |
| "python": { | |
| "figures": _ls(PY_FIG_DIR, (".png", ".jpg", ".jpeg")), | |
| "tables": _ls(PY_TAB_DIR, (".csv", ".json")), | |
| }, | |
| } | |
| def load_css() -> str: | |
| css_path = BASE_DIR / "style.css" | |
| return css_path.read_text(encoding="utf-8") if css_path.exists() else "" | |
| # ========================================================= | |
| # DATA LOADING | |
| # ========================================================= | |
| def make_demo_dashboard_df() -> pd.DataFrame: | |
| data = [ | |
| ["Paris", "E-Scooter", 4.6, 4.1, 0.12, 0.06], | |
| ["Paris", "E-Bike", 4.3, 4.2, 0.14, 0.05], | |
| ["Berlin", "E-Scooter", 4.9, 3.8, 0.05, 0.08], | |
| ["Berlin", "E-Bike", 4.5, 4.0, 0.09, 0.06], | |
| ["Madrid", "E-Scooter", 4.2, 4.3, 0.17, 0.05], | |
| ["Madrid", "Bus-Connect", 3.9, 4.1, 0.16, 0.04], | |
| ["Warsaw", "E-Scooter", 4.4, 3.9, 0.08, 0.07], | |
| ["Warsaw", "Shared-EV", 5.0, 4.0, 0.07, 0.05], | |
| ["Turin", "E-Bike", 4.1, 4.2, 0.10, 0.04], | |
| ["Turin", "Shared-EV", 4.8, 4.1, 0.09, 0.05], | |
| ] | |
| return pd.DataFrame( | |
| data, | |
| columns=[ | |
| "city", | |
| "vehicle_type", | |
| "avg_final_price_eur", | |
| "avg_rating", | |
| "avg_sentiment", | |
| "cancellation_rate", | |
| ], | |
| ) | |
| def load_dashboard_df() -> pd.DataFrame: | |
| candidates = [ | |
| BASE_DIR / "merged_summary.csv", | |
| BASE_DIR / "dashboard_data.csv", | |
| PY_TAB_DIR / "merged_summary.csv", | |
| PY_TAB_DIR / "dashboard_data.csv", | |
| ] | |
| for path in candidates: | |
| if path.exists(): | |
| try: | |
| df = pd.read_csv(path) | |
| df.columns = [str(c).strip() for c in df.columns] | |
| return df | |
| except Exception: | |
| pass | |
| return make_demo_dashboard_df() | |
| def normalize_dashboard_df(df: pd.DataFrame) -> pd.DataFrame: | |
| df = df.copy() | |
| cols = {c.lower().strip(): c for c in df.columns} | |
| rename_map = {} | |
| if "city" not in cols and "City" in df.columns: | |
| rename_map["City"] = "city" | |
| if "vehicle_type" not in cols: | |
| for candidate in ["ride_type", "vehicle", "VehicleType", "vehicle"]: | |
| if candidate in df.columns: | |
| rename_map[candidate] = "vehicle_type" | |
| break | |
| if "avg_final_price_eur" not in cols: | |
| for candidate in ["final_price_eur", "avg_price", "avg_final_price", "price"]: | |
| if candidate in df.columns: | |
| rename_map[candidate] = "avg_final_price_eur" | |
| break | |
| if "avg_rating" not in cols: | |
| for candidate in ["rating", "avg_star_rating", "star_rating"]: | |
| if candidate in df.columns: | |
| rename_map[candidate] = "avg_rating" | |
| break | |
| if "avg_sentiment" not in cols: | |
| for candidate in ["sentiment", "compound", "vader_compound", "avg_compound_score"]: | |
| if candidate in df.columns: | |
| rename_map[candidate] = "avg_sentiment" | |
| break | |
| if "cancellation_rate" not in cols: | |
| for candidate in ["cancel_rate", "avg_cancellation_rate"]: | |
| if candidate in df.columns: | |
| rename_map[candidate] = "cancellation_rate" | |
| break | |
| if rename_map: | |
| df = df.rename(columns=rename_map) | |
| for needed in [ | |
| "city", | |
| "vehicle_type", | |
| "avg_final_price_eur", | |
| "avg_rating", | |
| "avg_sentiment", | |
| "cancellation_rate", | |
| ]: | |
| if needed not in df.columns: | |
| if needed in ["city", "vehicle_type"]: | |
| df[needed] = "Unknown" | |
| else: | |
| df[needed] = 0.0 | |
| return df | |
| def filter_dashboard_df(df: pd.DataFrame, city: str, vehicle: str) -> pd.DataFrame: | |
| out = df.copy() | |
| if city != "All": | |
| out = out[out["city"] == city] | |
| if vehicle != "All": | |
| out = out[out["vehicle_type"] == vehicle] | |
| return out | |
| # ========================================================= | |
| # PIPELINE RUNNERS | |
| # ========================================================= | |
| def run_notebook(nb_name: str) -> str: | |
| ensure_dirs() | |
| nb_in = BASE_DIR / nb_name | |
| if not nb_in.exists(): | |
| return f"ERROR: {nb_name} not found." | |
| nb_out = RUNS_DIR / f"run_{stamp()}_{nb_name}" | |
| pm.execute_notebook( | |
| input_path=str(nb_in), | |
| output_path=str(nb_out), | |
| cwd=str(BASE_DIR), | |
| log_output=True, | |
| progress_bar=False, | |
| request_save_on_cell_execute=True, | |
| execution_timeout=PAPERMILL_TIMEOUT, | |
| ) | |
| return f"Executed {nb_name}" | |
| def run_datacreation() -> str: | |
| try: | |
| log = run_notebook(NB1) | |
| csvs = [f.name for f in BASE_DIR.glob("*.csv")] | |
| return f"OK {log}\n\nCSVs now in /app:\n" + "\n".join( | |
| f" - {c}" for c in sorted(csvs) | |
| ) | |
| except Exception as e: | |
| return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}" | |
| def run_pythonanalysis() -> str: | |
| try: | |
| log = run_notebook(NB2) | |
| idx = artifacts_index() | |
| figs = idx["python"]["figures"] | |
| tabs = idx["python"]["tables"] | |
| return ( | |
| f"OK {log}\n\n" | |
| f"Figures: {', '.join(figs) or '(none)'}\n" | |
| f"Tables: {', '.join(tabs) or '(none)'}" | |
| ) | |
| except Exception as e: | |
| return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}" | |
| def run_full_pipeline() -> str: | |
| logs = [] | |
| logs.append("=" * 50) | |
| logs.append("STEP 1/2: Data Creation") | |
| logs.append("=" * 50) | |
| logs.append(run_datacreation()) | |
| logs.append("") | |
| logs.append("=" * 50) | |
| logs.append("STEP 2/2: Python Analysis") | |
| logs.append("=" * 50) | |
| logs.append(run_pythonanalysis()) | |
| return "\n".join(logs) | |
| # ========================================================= | |
| # GALLERY LOADERS | |
| # ========================================================= | |
| def _load_all_figures() -> List[Tuple[str, str]]: | |
| items = [] | |
| for p in sorted(PY_FIG_DIR.glob("*.png")): | |
| items.append((str(p), p.stem.replace("_", " ").title())) | |
| return items | |
| def _load_table_safe(path: Path) -> pd.DataFrame: | |
| try: | |
| if path.suffix == ".json": | |
| obj = _read_json(path) | |
| if isinstance(obj, dict): | |
| return pd.DataFrame([obj]) | |
| return pd.DataFrame(obj) | |
| return _read_csv(path) | |
| except Exception as e: | |
| return pd.DataFrame([{"error": str(e)}]) | |
| def refresh_gallery(): | |
| figures = _load_all_figures() | |
| idx = artifacts_index() | |
| table_choices = list(idx["python"]["tables"]) | |
| default_df = pd.DataFrame() | |
| if table_choices: | |
| default_df = _load_table_safe(PY_TAB_DIR / table_choices[0]) | |
| return ( | |
| figures if figures else [], | |
| gr.update( | |
| choices=table_choices, | |
| value=table_choices[0] if table_choices else None | |
| ), | |
| default_df, | |
| ) | |
| def on_table_select(choice: str): | |
| if not choice: | |
| return pd.DataFrame([{"hint": "Select a table above."}]) | |
| path = PY_TAB_DIR / choice | |
| if not path.exists(): | |
| return pd.DataFrame([{"error": f"File not found: {choice}"}]) | |
| return _load_table_safe(path) | |
| # ========================================================= | |
| # DASHBOARD + PREDICTION | |
| # ========================================================= | |
| def render_kpi_cards(city: str = "All", vehicle: str = "All") -> str: | |
| df = normalize_dashboard_df(load_dashboard_df()) | |
| df = filter_dashboard_df(df, city, vehicle) | |
| if df.empty: | |
| return """ | |
| <div style="padding:16px;background:#fef2f2;border-radius:12px;"> | |
| <h4>No data available for the selected filters.</h4> | |
| </div> | |
| """ | |
| avg_price = df["avg_final_price_eur"].mean() | |
| avg_rating = df["avg_rating"].mean() | |
| avg_sentiment = df["avg_sentiment"].mean() | |
| avg_cancel = df["cancellation_rate"].mean() | |
| positive_reviews_pct = 53.8 | |
| if "avg_sentiment" in df.columns: | |
| positive_reviews_pct = round((df["avg_sentiment"] > 0.05).mean() * 100, 1) | |
| return f""" | |
| <div style="display:grid;grid-template-columns:repeat(4,1fr);gap:12px;"> | |
| <div style="padding:16px;background:#f5f5f5;border-radius:12px;"> | |
| <h4>Avg Final Price</h4><p>€{avg_price:.2f}</p> | |
| </div> | |
| <div style="padding:16px;background:#f5f5f5;border-radius:12px;"> | |
| <h4>Avg Rating</h4><p>{avg_rating:.2f} / 5</p> | |
| </div> | |
| <div style="padding:16px;background:#f5f5f5;border-radius:12px;"> | |
| <h4>Cancellation Rate</h4><p>{avg_cancel * 100:.1f}%</p> | |
| </div> | |
| <div style="padding:16px;background:#f5f5f5;border-radius:12px;"> | |
| <h4>Positive Segments</h4><p>{positive_reviews_pct:.1f}%</p> | |
| </div> | |
| </div> | |
| """ | |
| def refresh_dashboard(city: str = "All", vehicle: str = "All"): | |
| df = normalize_dashboard_df(load_dashboard_df()) | |
| df = filter_dashboard_df(df, city, vehicle) | |
| if df.empty: | |
| empty_fig = go.Figure() | |
| empty_fig.update_layout(title="No data for selected filters") | |
| return render_kpi_cards(city, vehicle), empty_fig, empty_fig, empty_fig | |
| by_segment = df.groupby(["city", "vehicle_type"], as_index=False).agg( | |
| avg_final_price_eur=("avg_final_price_eur", "mean"), | |
| avg_sentiment=("avg_sentiment", "mean"), | |
| avg_rating=("avg_rating", "mean"), | |
| cancellation_rate=("cancellation_rate", "mean"), | |
| ) | |
| fig1 = go.Figure() | |
| fig1.add_bar( | |
| x=[f"{r['city']} - {r['vehicle_type']}" for _, r in by_segment.iterrows()], | |
| y=by_segment["avg_final_price_eur"], | |
| ) | |
| fig1.update_layout( | |
| title="Average Final Price by City / Vehicle", | |
| xaxis_title="Segment", | |
| yaxis_title="EUR", | |
| ) | |
| fig2 = go.Figure() | |
| fig2.add_bar( | |
| x=[f"{r['city']} - {r['vehicle_type']}" for _, r in by_segment.iterrows()], | |
| y=by_segment["avg_sentiment"], | |
| ) | |
| fig2.update_layout( | |
| title="Average Sentiment by City / Vehicle", | |
| xaxis_title="Segment", | |
| yaxis_title="Sentiment", | |
| ) | |
| city_group = df.groupby("city", as_index=False).agg( | |
| avg_rating=("avg_rating", "mean"), | |
| cancellation_rate=("cancellation_rate", "mean"), | |
| ) | |
| fig3 = go.Figure() | |
| fig3.add_bar(name="Avg Rating", x=city_group["city"], y=city_group["avg_rating"]) | |
| fig3.add_bar( | |
| name="Cancellation Rate", | |
| x=city_group["city"], | |
| y=city_group["cancellation_rate"] * 100, | |
| ) | |
| fig3.update_layout( | |
| title="Average Rating / Cancellation View", | |
| xaxis_title="City", | |
| yaxis_title="Value", | |
| barmode="group", | |
| ) | |
| return render_kpi_cards(city, vehicle), fig1, fig2, fig3 | |
| def predict_satisfaction( | |
| city, | |
| vehicle, | |
| distance_km, | |
| duration_min, | |
| final_price_eur, | |
| discount_pct, | |
| time_slot, | |
| cancellation_flag, | |
| ): | |
| score = 0.50 | |
| if final_price_eur <= 4.5: | |
| score += 0.15 | |
| else: | |
| score -= 0.10 | |
| if discount_pct >= 10: | |
| score += 0.10 | |
| if cancellation_flag == 1: | |
| score -= 0.25 | |
| if time_slot == "Night": | |
| score -= 0.10 | |
| if vehicle == "E-Bike": | |
| score += 0.05 | |
| if distance_km <= 4: | |
| score += 0.03 | |
| if duration_min > 25: | |
| score -= 0.05 | |
| score = max(0.0, min(1.0, score)) | |
| return { | |
| "city": city, | |
| "vehicle_type": vehicle, | |
| "high_satisfaction_probability": round(score, 3), | |
| "low_satisfaction_probability": round(1 - score, 3), | |
| "predicted_label": ( | |
| "High Satisfaction" if score >= 0.5 else "Low Satisfaction" | |
| ), | |
| } | |
| def get_pricing_recommendation(city, vehicle): | |
| if city in ["Berlin", "Warsaw"] and vehicle == "E-Scooter": | |
| return { | |
| "decision": "Price Review", | |
| "reason": "Lower sentiment and higher price sensitivity in this segment.", | |
| "city": city, | |
| "vehicle_type": vehicle, | |
| } | |
| if city == "Madrid": | |
| return { | |
| "decision": "Maintain Pricing", | |
| "reason": "Strong satisfaction profile and positive sentiment.", | |
| "city": city, | |
| "vehicle_type": vehicle, | |
| } | |
| return { | |
| "decision": "Maintain Pricing", | |
| "reason": "Segment looks stable based on current sentiment and pricing.", | |
| "city": city, | |
| "vehicle_type": vehicle, | |
| } | |
| # ========================================================= | |
| # PRE-LOAD CHARTS AT STARTUP (fix for gradio 4.31 compatibility) | |
| # ========================================================= | |
| _kpi_init = render_kpi_cards() | |
| _, _fig1_init, _fig2_init, _fig3_init = refresh_dashboard() | |
| # ========================================================= | |
| # APP UI | |
| # ========================================================= | |
| with gr.Blocks(title="Urban Mobility AI App", css=load_css()) as demo: | |
| gr.Markdown( | |
| "# Urban Mobility Pricing & Satisfaction App\n" | |
| "*AI-enhanced dashboard for Group 08*", | |
| elem_id="escp_title", | |
| ) | |
| # =========================================================== | |
| # TAB 1 -- Pipeline Runner | |
| # =========================================================== | |
| with gr.Tab("Pipeline Runner"): | |
| gr.Markdown("Run the data creation and analysis notebooks.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| btn_nb1 = gr.Button("Step 1: Data Creation", variant="secondary") | |
| with gr.Column(scale=1): | |
| btn_nb2 = gr.Button("Step 2: Python Analysis", variant="secondary") | |
| with gr.Row(): | |
| btn_all = gr.Button("Run Full Pipeline (Both Steps)", variant="primary") | |
| run_log = gr.Textbox( | |
| label="Execution Log", | |
| lines=18, | |
| max_lines=30, | |
| interactive=False, | |
| ) | |
| btn_nb1.click(run_datacreation, outputs=[run_log]) | |
| btn_nb2.click(run_pythonanalysis, outputs=[run_log]) | |
| btn_all.click(run_full_pipeline, outputs=[run_log]) | |
| # =========================================================== | |
| # TAB 2 -- Urban Mobility Dashboard | |
| # =========================================================== | |
| with gr.Tab("Urban Mobility Dashboard"): | |
| gr.Markdown("### Urban Mobility KPIs & Visual Insights") | |
| kpi_html = gr.HTML(value=_kpi_init) | |
| with gr.Row(): | |
| city_filter = gr.Dropdown( | |
| label="Select City", | |
| choices=["All", "Paris", "Berlin", "Madrid", "Warsaw", "Turin"], | |
| value="All", | |
| interactive=True, | |
| ) | |
| vehicle_filter = gr.Dropdown( | |
| label="Select Vehicle Type", | |
| choices=["All", "E-Scooter", "E-Bike", "Shared-EV", "Bus-Connect"], | |
| value="All", | |
| interactive=True, | |
| ) | |
| refresh_btn = gr.Button("Refresh Dashboard", variant="primary") | |
| gr.Markdown("#### Interactive Charts") | |
| chart_price = gr.Plot(value=_fig1_init, label="Average Final Price by City / Vehicle") | |
| chart_sentiment = gr.Plot(value=_fig2_init, label="Sentiment by City / Vehicle") | |
| chart_rating = gr.Plot(value=_fig3_init, label="Average Rating / Cancellation View") | |
| gr.Markdown("#### Static Figures (from notebooks)") | |
| gallery = gr.Gallery( | |
| label="Generated Figures", | |
| columns=2, | |
| height=480, | |
| object_fit="contain", | |
| ) | |
| gr.Markdown("#### Data Tables") | |
| table_dropdown = gr.Dropdown( | |
| label="Select a table to view", | |
| choices=[], | |
| interactive=True, | |
| ) | |
| table_display = gr.Dataframe( | |
| label="Table Preview", | |
| interactive=False, | |
| ) | |
| def _on_refresh(city, vehicle): | |
| kpi, c1, c2, c3 = refresh_dashboard(city, vehicle) | |
| figs, dd, df = refresh_gallery() | |
| return kpi, c1, c2, c3, figs, dd, df | |
| refresh_btn.click( | |
| _on_refresh, | |
| inputs=[city_filter, vehicle_filter], | |
| outputs=[ | |
| kpi_html, | |
| chart_price, | |
| chart_sentiment, | |
| chart_rating, | |
| gallery, | |
| table_dropdown, | |
| table_display, | |
| ], | |
| ) | |
| table_dropdown.change( | |
| on_table_select, | |
| inputs=[table_dropdown], | |
| outputs=[table_display], | |
| ) | |
| # =========================================================== | |
| # TAB 3 -- Prediction + Recommendation | |
| # =========================================================== | |
| with gr.Tab("Prediction + Recommendation"): | |
| _ai_status = ( | |
| "Connected to your **n8n workflow**." if N8N_WEBHOOK_URL | |
| else "**LLM active.**" if LLM_ENABLED | |
| else "Using local logic. Add `N8N_WEBHOOK_URL` later for workflow integration." | |
| ) | |
| gr.Markdown( | |
| "### Predict user satisfaction and generate pricing recommendations\n\n" | |
| f"{_ai_status}" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| pred_city = gr.Dropdown( | |
| label="City", | |
| choices=["Paris", "Berlin", "Madrid", "Warsaw", "Turin"], | |
| value="Berlin", | |
| ) | |
| pred_vehicle = gr.Dropdown( | |
| label="Vehicle Type", | |
| choices=["E-Scooter", "E-Bike", "Shared-EV", "Bus-Connect"], | |
| value="E-Scooter", | |
| ) | |
| pred_distance = gr.Number(label="Distance (km)", value=3.5) | |
| pred_duration = gr.Number(label="Duration (min)", value=12) | |
| pred_final_price = gr.Number(label="Final Price (EUR)", value=4.2) | |
| pred_discount = gr.Number(label="Discount (%)", value=10) | |
| pred_time_slot = gr.Dropdown( | |
| label="Time Slot", | |
| choices=["Morning", "Afternoon", "Evening", "Night"], | |
| value="Evening", | |
| ) | |
| pred_cancel = gr.Dropdown( | |
| label="Cancellation Flag", | |
| choices=[0, 1], | |
| value=0, | |
| ) | |
| predict_btn = gr.Button("Predict Satisfaction", variant="primary") | |
| recommend_btn = gr.Button("Get Pricing Recommendation") | |
| with gr.Column(): | |
| prediction_output = gr.JSON(label="Prediction Output") | |
| recommendation_output = gr.JSON(label="Recommendation Output") | |
| predict_btn.click( | |
| predict_satisfaction, | |
| inputs=[ | |
| pred_city, | |
| pred_vehicle, | |
| pred_distance, | |
| pred_duration, | |
| pred_final_price, | |
| pred_discount, | |
| pred_time_slot, | |
| pred_cancel, | |
| ], | |
| outputs=[prediction_output], | |
| ) | |
| recommend_btn.click( | |
| get_pricing_recommendation, | |
| inputs=[pred_city, pred_vehicle], | |
| outputs=[recommendation_output], | |
| ) | |
| demo.launch(allowed_paths=[str(BASE_DIR)]) |