Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import plotly.express as px | |
| import json | |
| import numpy as np | |
| from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer | |
| # Set Streamlit page configuration | |
| st.set_page_config(page_title="Construction Materials Dashboard", layout="wide") | |
| # Custom CSS for styling | |
| st.markdown(""" | |
| <style> | |
| .navbar { | |
| background-color: #1f77b4; | |
| padding: 1rem; | |
| border-radius: 8px; | |
| margin-bottom: 1rem; | |
| } | |
| .navbar-title { | |
| color: white; | |
| font-size: 24px; | |
| font-weight: bold; | |
| } | |
| .navbar-links a { | |
| color: white; | |
| margin-right: 1rem; | |
| text-decoration: none; | |
| font-size: 16px; | |
| } | |
| .navbar-links a:hover { | |
| text-decoration: underline; | |
| } | |
| .filter-container { | |
| background-color: #f5f5f5; | |
| padding: 1rem; | |
| border-radius: 8px; | |
| margin-bottom: 1rem; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Navbar | |
| st.markdown(""" | |
| <div class="navbar"> | |
| <div class="navbar-title">Construction Materials Dashboard</div> | |
| <div class="navbar-links"> | |
| <a href="#overview">Overview</a> | |
| <a href="#filters">Filters</a> | |
| <a href="#insights">LLM Insights</a> | |
| </div> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # File uploader for CSV | |
| st.subheader("Upload Dataset") | |
| uploaded_file = st.file_uploader("Choose a CSV file", type="csv") | |
| if uploaded_file is not None: | |
| # Load dataset from uploaded file | |
| def load_data(uploaded_file): | |
| try: | |
| return pd.read_csv(uploaded_file) | |
| except Exception as e: | |
| st.error(f"Error reading CSV file: {str(e)}") | |
| return None | |
| df = load_data(uploaded_file) | |
| if df is None: | |
| st.stop() | |
| # Display data preview | |
| st.write("**Uploaded Data Preview**") | |
| st.dataframe(df.head()) | |
| # Check for required columns | |
| required_columns = ['product_category', 'grade', 'ratings', 'supplier_name', 'bidding_amount'] | |
| missing_columns = [col for col in required_columns if col not in df.columns] | |
| if missing_columns: | |
| st.error(f"Missing required columns in CSV: {', '.join(missing_columns)}") | |
| st.stop() | |
| # Convert ratings to string for consistency | |
| df['ratings'] = df['ratings'].astype(str) | |
| # Filter section | |
| st.subheader("Filters", anchor="filters") | |
| with st.container(): | |
| st.markdown('<div class="filter-container">', unsafe_allow_html=True) | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| product_categories = sorted(df['product_category'].dropna().unique()) | |
| selected_categories = st.multiselect("Product Category", product_categories, default=product_categories) | |
| with col2: | |
| grades = sorted(df['grade'].dropna().unique()) | |
| selected_grades = st.multiselect("Grade", grades, default=grades) | |
| with col3: | |
| ratings = sorted(df['ratings'].dropna().unique()) | |
| selected_ratings = st.multiselect("Ratings", ratings, default=ratings) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| # Apply filters | |
| filtered_df = df[ | |
| (df['product_category'].isin(selected_categories)) & | |
| (df['grade'].isin(selected_grades)) & | |
| (df['ratings'].isin(selected_ratings)) | |
| ] | |
| if filtered_df.empty: | |
| st.warning("No data matches the selected filters. Please adjust your filter selections.") | |
| st.stop() | |
| # Aggregate by supplier | |
| seller_data = filtered_df.groupby("supplier_name").agg({ | |
| "bidding_amount": "sum" | |
| }).reset_index() | |
| # Top 5 sellers | |
| top_sellers = seller_data.sort_values("bidding_amount", ascending=False).head(5) | |
| # Overview section | |
| st.subheader("Overview", anchor="overview") | |
| if not top_sellers.empty: | |
| st.write("**Top 5 Suppliers by Total Bidding Amount**") | |
| fig_bar = px.bar( | |
| top_sellers, | |
| x="supplier_name", | |
| y="bidding_amount", | |
| labels={"supplier_name": "Supplier", "bidding_amount": "Bidding Amount"}, | |
| title="Top 5 Suppliers", | |
| color="supplier_name" | |
| ) | |
| fig_bar.update_layout(showlegend=False) | |
| st.plotly_chart(fig_bar, use_container_width=True) | |
| st.write("**Bidding Distribution (Top 5)**") | |
| fig_pie = px.pie( | |
| top_sellers, | |
| names="supplier_name", | |
| values="bidding_amount", | |
| title="Bidding Amount by Supplier" | |
| ) | |
| st.plotly_chart(fig_pie, use_container_width=True) | |
| else: | |
| st.warning("No supplier data available after filtering.") | |
| total_bidding = seller_data["bidding_amount"].sum() | |
| st.write(f"**Total Bidding Amount (All Suppliers):** ${total_bidding:,.2f}") | |
| # Convert complex types to simple Python types | |
| def convert_to_serializable(obj): | |
| if isinstance(obj, np.integer): | |
| return int(obj) | |
| elif isinstance(obj, np.floating): | |
| return float(obj) | |
| elif isinstance(obj, np.ndarray): | |
| return obj.tolist() | |
| elif isinstance(obj, (pd.Series, pd.DataFrame)): | |
| return obj.to_dict() | |
| elif isinstance(obj, dict): | |
| return {k: convert_to_serializable(v) for k, v in obj.items()} | |
| elif isinstance(obj, list): | |
| return [convert_to_serializable(i) for i in obj] | |
| return obj | |
| # LLM Section | |
| st.subheader("Ask Mistral About the Data", anchor="insights") | |
| user_query = st.text_input("Enter your question:", "Summarize why these are the top 5 suppliers.") | |
| # Load Mistral model from Hugging Face | |
| def load_mistral_pipeline(): | |
| try: | |
| model_id = "mistralai/Mistral-7B-Instruct-v0.1" | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto") | |
| pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512) | |
| return pipe | |
| except Exception as e: | |
| st.error(f"Error loading Mistral model: {str(e)}") | |
| return None | |
| if user_query: | |
| with st.spinner("Generating response..."): | |
| pipe = load_mistral_pipeline() | |
| if pipe is None: | |
| st.error("Cannot generate response due to model loading failure.") | |
| st.stop() | |
| # Prepare prompt | |
| top_sellers_json = json.dumps(convert_to_serializable(top_sellers), indent=2) | |
| filters_applied = { | |
| "product_category": selected_categories, | |
| "grade": selected_grades, | |
| "ratings": selected_ratings | |
| } | |
| prompt = f"""You are the helpful assistant. Based on the dataset below and filters, answer the following user question. | |
| Top 5 sellers: | |
| {top_sellers_json} | |
| Filters applied: | |
| {json.dumps(filters_applied, indent=2)} | |
| Question: | |
| {user_query} | |
| """ | |
| response = pipe(prompt)[0]['generated_text'] | |
| # Extract the answer | |
| answer = response[len(prompt):].strip() if response.startswith(prompt) else response.strip() | |
| st.markdown("**Mistral LLM Response:**") | |
| st.write(answer) | |
| else: | |
| st.info("Enter a question to ask Mistral about the bidding data.") | |
| else: | |
| st.warning("Please upload a CSV file to proceed.") |