Spaces:

Balaprime
/

AI_comparison_Bot

Sleeping

File size: 7,397 Bytes

import streamlit as st
import pandas as pd
import plotly.express as px
import json
import numpy as np
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer

# Set Streamlit page configuration
st.set_page_config(page_title="Construction Materials Dashboard", layout="wide")

# Custom CSS for styling
st.markdown("""
<style>
    .navbar {
        background-color: #1f77b4;
        padding: 1rem;
        border-radius: 8px;
        margin-bottom: 1rem;
    }
    .navbar-title {
        color: white;
        font-size: 24px;
        font-weight: bold;
    }
    .navbar-links a {
        color: white;
        margin-right: 1rem;
        text-decoration: none;
        font-size: 16px;
    }
    .navbar-links a:hover {
        text-decoration: underline;
    }
    .filter-container {
        background-color: #f5f5f5;
        padding: 1rem;
        border-radius: 8px;
        margin-bottom: 1rem;
    }
</style>
""", unsafe_allow_html=True)

# Navbar
st.markdown("""
<div class="navbar">
    <div class="navbar-title">Construction Materials Dashboard</div>
    <div class="navbar-links">
        <a href="#overview">Overview</a>
        <a href="#filters">Filters</a>
        <a href="#insights">LLM Insights</a>
    </div>
</div>
""", unsafe_allow_html=True)

# File uploader for CSV
st.subheader("Upload Dataset")
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")

if uploaded_file is not None:
    # Load dataset from uploaded file
    @st.cache_data
    def load_data(uploaded_file):
        try:
            return pd.read_csv(uploaded_file)
        except Exception as e:
            st.error(f"Error reading CSV file: {str(e)}")
            return None

    df = load_data(uploaded_file)
    if df is None:
        st.stop()

    # Display data preview
    st.write("**Uploaded Data Preview**")
    st.dataframe(df.head())

    # Check for required columns
    required_columns = ['product_category', 'grade', 'ratings', 'supplier_name', 'bidding_amount']
    missing_columns = [col for col in required_columns if col not in df.columns]
    if missing_columns:
        st.error(f"Missing required columns in CSV: {', '.join(missing_columns)}")
        st.stop()

    # Convert ratings to string for consistency
    df['ratings'] = df['ratings'].astype(str)

    # Filter section
    st.subheader("Filters", anchor="filters")
    with st.container():
        st.markdown('<div class="filter-container">', unsafe_allow_html=True)
        col1, col2, col3 = st.columns(3)

        with col1:
            product_categories = sorted(df['product_category'].dropna().unique())
            selected_categories = st.multiselect("Product Category", product_categories, default=product_categories)

        with col2:
            grades = sorted(df['grade'].dropna().unique())
            selected_grades = st.multiselect("Grade", grades, default=grades)

        with col3:
            ratings = sorted(df['ratings'].dropna().unique())
            selected_ratings = st.multiselect("Ratings", ratings, default=ratings)

        st.markdown('</div>', unsafe_allow_html=True)

    # Apply filters
    filtered_df = df[
        (df['product_category'].isin(selected_categories)) &
        (df['grade'].isin(selected_grades)) &
        (df['ratings'].isin(selected_ratings))
    ]

    if filtered_df.empty:
        st.warning("No data matches the selected filters. Please adjust your filter selections.")
        st.stop()

    # Aggregate by supplier
    seller_data = filtered_df.groupby("supplier_name").agg({
        "bidding_amount": "sum"
    }).reset_index()

    # Top 5 sellers
    top_sellers = seller_data.sort_values("bidding_amount", ascending=False).head(5)

    # Overview section
    st.subheader("Overview", anchor="overview")
    if not top_sellers.empty:
        st.write("**Top 5 Suppliers by Total Bidding Amount**")
        fig_bar = px.bar(
            top_sellers,
            x="supplier_name",
            y="bidding_amount",
            labels={"supplier_name": "Supplier", "bidding_amount": "Bidding Amount"},
            title="Top 5 Suppliers",
            color="supplier_name"
        )
        fig_bar.update_layout(showlegend=False)
        st.plotly_chart(fig_bar, use_container_width=True)

        st.write("**Bidding Distribution (Top 5)**")
        fig_pie = px.pie(
            top_sellers,
            names="supplier_name",
            values="bidding_amount",
            title="Bidding Amount by Supplier"
        )
        st.plotly_chart(fig_pie, use_container_width=True)
    else:
        st.warning("No supplier data available after filtering.")

    total_bidding = seller_data["bidding_amount"].sum()
    st.write(f"**Total Bidding Amount (All Suppliers):** ${total_bidding:,.2f}")

    # Convert complex types to simple Python types
    def convert_to_serializable(obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        elif isinstance(obj, (pd.Series, pd.DataFrame)):
            return obj.to_dict()
        elif isinstance(obj, dict):
            return {k: convert_to_serializable(v) for k, v in obj.items()}
        elif isinstance(obj, list):
            return [convert_to_serializable(i) for i in obj]
        return obj

    # LLM Section
    st.subheader("Ask Mistral About the Data", anchor="insights")
    user_query = st.text_input("Enter your question:", "Summarize why these are the top 5 suppliers.")

    # Load Mistral model from Hugging Face
    @st.cache_resource
    def load_mistral_pipeline():
        try:
            model_id = "mistralai/Mistral-7B-Instruct-v0.1"
            tokenizer = AutoTokenizer.from_pretrained(model_id)
            model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto")
            pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)
            return pipe
        except Exception as e:
            st.error(f"Error loading Mistral model: {str(e)}")
            return None

    if user_query:
        with st.spinner("Generating response..."):
            pipe = load_mistral_pipeline()
            if pipe is None:
                st.error("Cannot generate response due to model loading failure.")
                st.stop()

            # Prepare prompt
            top_sellers_json = json.dumps(convert_to_serializable(top_sellers), indent=2)
            filters_applied = {
                "product_category": selected_categories,
                "grade": selected_grades,
                "ratings": selected_ratings
            }
            prompt = f"""You are the helpful assistant. Based on the dataset below and filters, answer the following user question.

Top 5 sellers:
{top_sellers_json}

Filters applied:
{json.dumps(filters_applied, indent=2)}

Question:
{user_query}
"""
            response = pipe(prompt)[0]['generated_text']
            # Extract the answer
            answer = response[len(prompt):].strip() if response.startswith(prompt) else response.strip()
            st.markdown("**Mistral LLM Response:**")
            st.write(answer)
    else:
        st.info("Enter a question to ask Mistral about the bidding data.")
else:
    st.warning("Please upload a CSV file to proceed.")