import streamlit as st import pandas as pd import plotly.express as px import json import numpy as np from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer # Set Streamlit page configuration st.set_page_config(page_title="Construction Materials Dashboard", layout="wide") # Custom CSS for styling st.markdown(""" """, unsafe_allow_html=True) # Navbar st.markdown(""" """, unsafe_allow_html=True) # File uploader for CSV st.subheader("Upload Dataset") uploaded_file = st.file_uploader("Choose a CSV file", type="csv") if uploaded_file is not None: # Load dataset from uploaded file @st.cache_data def load_data(uploaded_file): try: return pd.read_csv(uploaded_file) except Exception as e: st.error(f"Error reading CSV file: {str(e)}") return None df = load_data(uploaded_file) if df is None: st.stop() # Display data preview st.write("**Uploaded Data Preview**") st.dataframe(df.head()) # Check for required columns required_columns = ['product_category', 'grade', 'ratings', 'supplier_name', 'bidding_amount'] missing_columns = [col for col in required_columns if col not in df.columns] if missing_columns: st.error(f"Missing required columns in CSV: {', '.join(missing_columns)}") st.stop() # Convert ratings to string for consistency df['ratings'] = df['ratings'].astype(str) # Filter section st.subheader("Filters", anchor="filters") with st.container(): st.markdown('
', unsafe_allow_html=True) col1, col2, col3 = st.columns(3) with col1: product_categories = sorted(df['product_category'].dropna().unique()) selected_categories = st.multiselect("Product Category", product_categories, default=product_categories) with col2: grades = sorted(df['grade'].dropna().unique()) selected_grades = st.multiselect("Grade", grades, default=grades) with col3: ratings = sorted(df['ratings'].dropna().unique()) selected_ratings = st.multiselect("Ratings", ratings, default=ratings) st.markdown('
', unsafe_allow_html=True) # Apply filters filtered_df = df[ (df['product_category'].isin(selected_categories)) & (df['grade'].isin(selected_grades)) & (df['ratings'].isin(selected_ratings)) ] if filtered_df.empty: st.warning("No data matches the selected filters. Please adjust your filter selections.") st.stop() # Aggregate by supplier seller_data = filtered_df.groupby("supplier_name").agg({ "bidding_amount": "sum" }).reset_index() # Top 5 sellers top_sellers = seller_data.sort_values("bidding_amount", ascending=False).head(5) # Overview section st.subheader("Overview", anchor="overview") if not top_sellers.empty: st.write("**Top 5 Suppliers by Total Bidding Amount**") fig_bar = px.bar( top_sellers, x="supplier_name", y="bidding_amount", labels={"supplier_name": "Supplier", "bidding_amount": "Bidding Amount"}, title="Top 5 Suppliers", color="supplier_name" ) fig_bar.update_layout(showlegend=False) st.plotly_chart(fig_bar, use_container_width=True) st.write("**Bidding Distribution (Top 5)**") fig_pie = px.pie( top_sellers, names="supplier_name", values="bidding_amount", title="Bidding Amount by Supplier" ) st.plotly_chart(fig_pie, use_container_width=True) else: st.warning("No supplier data available after filtering.") total_bidding = seller_data["bidding_amount"].sum() st.write(f"**Total Bidding Amount (All Suppliers):** ${total_bidding:,.2f}") # Convert complex types to simple Python types def convert_to_serializable(obj): if isinstance(obj, np.integer): return int(obj) elif isinstance(obj, np.floating): return float(obj) elif isinstance(obj, np.ndarray): return obj.tolist() elif isinstance(obj, (pd.Series, pd.DataFrame)): return obj.to_dict() elif isinstance(obj, dict): return {k: convert_to_serializable(v) for k, v in obj.items()} elif isinstance(obj, list): return [convert_to_serializable(i) for i in obj] return obj # LLM Section st.subheader("Ask Mistral About the Data", anchor="insights") user_query = st.text_input("Enter your question:", "Summarize why these are the top 5 suppliers.") # Load Mistral model from Hugging Face @st.cache_resource def load_mistral_pipeline(): try: model_id = "mistralai/Mistral-7B-Instruct-v0.1" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto") pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512) return pipe except Exception as e: st.error(f"Error loading Mistral model: {str(e)}") return None if user_query: with st.spinner("Generating response..."): pipe = load_mistral_pipeline() if pipe is None: st.error("Cannot generate response due to model loading failure.") st.stop() # Prepare prompt top_sellers_json = json.dumps(convert_to_serializable(top_sellers), indent=2) filters_applied = { "product_category": selected_categories, "grade": selected_grades, "ratings": selected_ratings } prompt = f"""You are the helpful assistant. Based on the dataset below and filters, answer the following user question. Top 5 sellers: {top_sellers_json} Filters applied: {json.dumps(filters_applied, indent=2)} Question: {user_query} """ response = pipe(prompt)[0]['generated_text'] # Extract the answer answer = response[len(prompt):].strip() if response.startswith(prompt) else response.strip() st.markdown("**Mistral LLM Response:**") st.write(answer) else: st.info("Enter a question to ask Mistral about the bidding data.") else: st.warning("Please upload a CSV file to proceed.")