Spaces:

Balaprime
/

AI_comparison_Bot

Sleeping

App Files Files Community

Balaprime commited on Jun 6, 2025

Commit

8d52236

verified ·

1 Parent(s): 217c2e0

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +121 -114

src/streamlit_app.py CHANGED Viewed

@@ -52,115 +52,120 @@ st.markdown("""
 </div>
 """, unsafe_allow_html=True)
-# Load dataset
-@st.cache_data
-def load_data():
-    return pd.read_csv("construction_materials_dataset_3000.csv")
-df = load_data()
-# Filter section
-st.subheader("Filters", anchor="filters")
-with st.container():
-    st.markdown('<div class="filter-container">', unsafe_allow_html=True)
-    col1, col2, col3 = st.columns(3)
-    with col1:
-        product_categories = sorted(df['product_category'].dropna().unique())
-        selected_categories = st.multiselect("Product Category", product_categories, default=product_categories)
-    with col2:
-        grades = sorted(df['grade'].dropna().unique())
-        selected_grades = st.multiselect("Grade", grades, default=grades)
-    with col3:
-        ratings = sorted(df['ratings'].dropna().astype(str).unique())
-        selected_ratings = st.multiselect("Ratings", ratings, default=ratings)
-    st.markdown('</div>', unsafe_allow_html=True)
-# Apply filters
-filtered_df = df[
-    (df['product_category'].isin(selected_categories)) &
-    (df['grade'].isin(selected_grades)) &
-    (df['ratings'].astype(str).isin(selected_ratings))
-]
-# Aggregate by supplier
-seller_data = filtered_df.groupby("supplier_name").agg({
-    "bidding_amount": "sum"
-}).reset_index()
-# Top 5 sellers
-top_sellers = seller_data.sort_values("bidding_amount", ascending=False).head(5)
-# Overview section
-st.subheader("Overview", anchor="overview")
-st.write("**Top 5 Suppliers by Total Bidding Amount**")
-fig_bar = px.bar(
-    top_sellers,
-    x="supplier_name",
-    y="bidding_amount",
-    labels={"supplier_name": "Supplier", "bidding_amount": "Bidding Amount"},
-    title="Top 5 Suppliers",
-    color="supplier_name"
-)
-fig_bar.update_layout(showlegend=False)
-st.plotly_chart(fig_bar, use_container_width=True)
-st.write("**Bidding Distribution (Top 5)**")
-fig_pie = px.pie(
-    top_sellers,
-    names="supplier_name",
-    values="bidding_amount",
-    title="Bidding Amount by Supplier"
-)
-st.plotly_chart(fig_pie, use_container_width=True)
-total_bidding = seller_data["bidding_amount"].sum()
-st.write(f"**Total Bidding Amount (All Suppliers):** ${total_bidding:,.2f}")
-# Convert complex types to simple Python types
-def convert_to_serializable(obj):
-    if isinstance(obj, np.integer):
-        return int(obj)
-    elif isinstance(obj, np.floating):
-        return float(obj)
-    elif isinstance(obj, np.ndarray):
-        return obj.tolist()
-    elif isinstance(obj, (pd.Series, pd.DataFrame)):
-        return obj.to_dict()
-    elif isinstance(obj, dict):
-        return {k: convert_to_serializable(v) for k, v in obj.items()}
-    elif isinstance(obj, list):
-        return [convert_to_serializable(i) for i in obj]
-    return obj
-# LLM Section
-st.subheader("Ask Mistral About the Data", anchor="insights")
-user_query = st.text_input("Enter your question:", "Summarize why these are the top 5 suppliers.")
-# Load Mistral model from Hugging Face
-@st.cache_resource
-def load_mistral_pipeline():
-    model_id = "mistralai/Mistral-7B-Instruct-v0.1"
-    tokenizer = AutoTokenizer.from_pretrained(model_id)
-    model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto")
-    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)
-    return pipe
-if user_query:
-    with st.spinner("Generating response..."):
-        pipe = load_mistral_pipeline()
-        # Prepare prompt
-        top_sellers_json = json.dumps(convert_to_serializable(top_sellers), indent=2)
-        filters_applied = {
-            "product_category": selected_categories,
-            "grade": selected_grades,
-            "ratings": selected_ratings
-        }
-        prompt = f"""You are the helpful assistant. Based on the dataset below and filters, answer the following user question.
 Top 5 sellers:
 {top_sellers_json}
@@ -171,9 +176,11 @@ Filters applied:
 Question:
 {user_query}
 """
-        response = pipe(prompt)[0]['generated_text']
-        # Display only the assistant's answer (trim prompt if echoed)
-        st.markdown("**Mistral LLM Response:**")
-        st.write(response.split("Question:")[-1].strip())
 else:
-    st.info("Enter a question to ask Mistral about the bidding data.")

 </div>
 """, unsafe_allow_html=True)
+# File uploader for CSV
+st.subheader("Upload Dataset")
+uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
+if uploaded_file is not None:
+    # Load dataset from uploaded file
+    @st.cache_data
+    def load_data(uploaded_file):
+        return pd.read_csv(uploaded_file)
+    df = load_data(uploaded_file)
+    # Filter section
+    st.subheader("Filters", anchor="filters")
+    with st.container():
+        st.markdown('<div class="filter-container">', unsafe_allow_html=True)
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            product_categories = sorted(df['product_category'].dropna().unique())
+            selected_categories = st.multiselect("Product Category", product_categories, default=product_categories)
+        with col2:
+            grades = sorted(df['grade'].dropna().unique())
+            selected_grades = st.multiselect("Grade", grades, default=grades)
+        with col3:
+            ratings = sorted(df['ratings'].dropna().astype(str).unique())
+            selected_ratings = st.multiselect("Ratings", ratings, default=ratings)
+        st.markdown('</div>', unsafe_allow_html=True)
+    # Apply filters
+    filtered_df = df[
+        (df['product_category'].isin(selected_categories)) &
+        (df['grade'].isin(selected_grades)) &
+        (df['ratings'].astype(str).isin(selected_ratings))
+    ]
+    # Aggregate by supplier
+    seller_data = filtered_df.groupby("supplier_name").agg({
+        "bidding_amount": "sum"
+    }).reset_index()
+    # Top 5 sellers
+    top_sellers = seller_data.sort_values("bidding_amount", ascending=False).head(5)
+    # Overview section
+    st.subheader("Overview", anchor="overview")
+    st.write("**Top 5 Suppliers by Total Bidding Amount**")
+    fig_bar = px.bar(
+        top_sellers,
+        x="supplier_name",
+        y="bidding_amount",
+        labels={"supplier_name": "Supplier", "bidding_amount": "Bidding Amount"},
+        title="Top 5 Suppliers",
+        color="supplier_name"
+    )
+    fig_bar.update_layout(showlegend=False)
+    st.plotly_chart(fig_bar, use_container_width=True)
+    st.write("**Bidding Distribution (Top 5)**")
+    fig_pie = px.pie(
+        top_sellers,
+        names="supplier_name",
+        values="bidding_amount",
+        title="Bidding Amount by Supplier"
+    )
+    st.plotly_chart(fig_pie, use_container_width=True)
+    total_bidding = seller_data["bidding_amount"].sum()
+    st.write(f"**Total Bidding Amount (All Suppliers):** ${total_bidding:,.2f}")
+    # Convert complex types to simple Python types
+    def convert_to_serializable(obj):
+        if isinstance(obj, np.integer):
+            return int(obj)
+        elif isinstance(obj, np.floating):
+            return float(obj)
+        elif isinstance(obj, np.ndarray):
+            return obj.tolist()
+        elif isinstance(obj, (pd.Series, pd.DataFrame)):
+            return obj.to_dict()
+        elif isinstance(obj, dict):
+            return {k: convert_to_serializable(v) for k, v in obj.items()}
+        elif isinstance(obj, list):
+            return [convert_to_serializable(i) for i in obj]
+        return obj
+    # LLM Section
+    st.subheader("Ask Mistral About the Data", anchor="insights")
+    user_query = st.text_input("Enter your question:", "Summarize why these are the top 5 suppliers.")
+    # Load Mistral model from Hugging Face
+    @st.cache_resource
+    def load_mistral_pipeline():
+        model_id = "mistralai/Mistral-7B-Instruct-v0.1"
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto")
+        pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)
+        return pipe
+    if user_query:
+        with st.spinner("Generating response..."):
+            pipe = load_mistral_pipeline()
+            # Prepare prompt
+            top_sellers_json = json.dumps(convert_to_serializable(top_sellers), indent=2)
+            filters_applied = {
+                "product_category": selected_categories,
+                "grade": selected_grades,
+                "ratings": selected_ratings
+            }
+            prompt = f"""You are the helpful assistant. Based on the dataset below and filters, answer the following user question.
 Top 5 sellers:
 {top_sellers_json}
 Question:
 {user_query}
 """
+            response = pipe(prompt)[0]['generated_text']
+            # Display only the assistant's answer (trim prompt if echoed)
+            st.markdown("**Mistral LLM Response:**")
+            st.write(response.split("Question:")[-1].strip())
+    else:
+        st.info("Enter a question to ask Mistral about the bidding data.")
 else:
+    st.warning("Please upload a CSV file to proceed.")