Balaprime commited on
Commit
593dddb
·
verified ·
1 Parent(s): 03a9724

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +174 -35
src/streamlit_app.py CHANGED
@@ -1,40 +1,179 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
 
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ import json
5
+ import numpy as np
6
+ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
7
 
8
+ # Set Streamlit page configuration
9
+ st.set_page_config(page_title="Construction Materials Dashboard", layout="wide")
10
 
11
+ # Custom CSS for styling
12
+ st.markdown("""
13
+ <style>
14
+ .navbar {
15
+ background-color: #1f77b4;
16
+ padding: 1rem;
17
+ border-radius: 8px;
18
+ margin-bottom: 1rem;
19
+ }
20
+ .navbar-title {
21
+ color: white;
22
+ font-size: 24px;
23
+ font-weight: bold;
24
+ }
25
+ .navbar-links a {
26
+ color: white;
27
+ margin-right: 1rem;
28
+ text-decoration: none;
29
+ font-size: 16px;
30
+ }
31
+ .navbar-links a:hover {
32
+ text-decoration: underline;
33
+ }
34
+ .filter-container {
35
+ background-color: #f5f5f5;
36
+ padding: 1rem;
37
+ border-radius: 8px;
38
+ margin-bottom: 1rem;
39
+ }
40
+ </style>
41
+ """, unsafe_allow_html=True)
42
 
43
+ # Navbar
44
+ st.markdown("""
45
+ <div class="navbar">
46
+ <div class="navbar-title">Construction Materials Dashboard</div>
47
+ <div class="navbar-links">
48
+ <a href="#overview">Overview</a>
49
+ <a href="#filters">Filters</a>
50
+ <a href="#insights">LLM Insights</a>
51
+ </div>
52
+ </div>
53
+ """, unsafe_allow_html=True)
54
+
55
+ # Load dataset
56
+ @st.cache_data
57
+ def load_data():
58
+ return pd.read_csv("construction_materials_dataset_3000.csv")
59
+
60
+ df = load_data()
61
+
62
+ # Filter section
63
+ st.subheader("Filters", anchor="filters")
64
+ with st.container():
65
+ st.markdown('<div class="filter-container">', unsafe_allow_html=True)
66
+ col1, col2, col3 = st.columns(3)
67
+
68
+ with col1:
69
+ product_categories = sorted(df['product_category'].dropna().unique())
70
+ selected_categories = st.multiselect("Product Category", product_categories, default=product_categories)
71
+
72
+ with col2:
73
+ grades = sorted(df['grade'].dropna().unique())
74
+ selected_grades = st.multiselect("Grade", grades, default=grades)
75
+
76
+ with col3:
77
+ ratings = sorted(df['ratings'].dropna().astype(str).unique())
78
+ selected_ratings = st.multiselect("Ratings", ratings, default=ratings)
79
+
80
+ st.markdown('</div>', unsafe_allow_html=True)
81
+
82
+ # Apply filters
83
+ filtered_df = df[
84
+ (df['product_category'].isin(selected_categories)) &
85
+ (df['grade'].isin(selected_grades)) &
86
+ (df['ratings'].astype(str).isin(selected_ratings))
87
+ ]
88
 
89
+ # Aggregate by supplier
90
+ seller_data = filtered_df.groupby("supplier_name").agg({
91
+ "bidding_amount": "sum"
92
+ }).reset_index()
93
+
94
+ # Top 5 sellers
95
+ top_sellers = seller_data.sort_values("bidding_amount", ascending=False).head(5)
96
+
97
+ # Overview section
98
+ st.subheader("Overview", anchor="overview")
99
+ st.write("**Top 5 Suppliers by Total Bidding Amount**")
100
+ fig_bar = px.bar(
101
+ top_sellers,
102
+ x="supplier_name",
103
+ y="bidding_amount",
104
+ labels={"supplier_name": "Supplier", "bidding_amount": "Bidding Amount"},
105
+ title="Top 5 Suppliers",
106
+ color="supplier_name"
107
+ )
108
+ fig_bar.update_layout(showlegend=False)
109
+ st.plotly_chart(fig_bar, use_container_width=True)
110
+
111
+ st.write("**Bidding Distribution (Top 5)**")
112
+ fig_pie = px.pie(
113
+ top_sellers,
114
+ names="supplier_name",
115
+ values="bidding_amount",
116
+ title="Bidding Amount by Supplier"
117
+ )
118
+ st.plotly_chart(fig_pie, use_container_width=True)
119
+
120
+ total_bidding = seller_data["bidding_amount"].sum()
121
+ st.write(f"**Total Bidding Amount (All Suppliers):** ${total_bidding:,.2f}")
122
+
123
+ # Convert complex types to simple Python types
124
+ def convert_to_serializable(obj):
125
+ if isinstance(obj, np.integer):
126
+ return int(obj)
127
+ elif isinstance(obj, np.floating):
128
+ return float(obj)
129
+ elif isinstance(obj, np.ndarray):
130
+ return obj.tolist()
131
+ elif isinstance(obj, (pd.Series, pd.DataFrame)):
132
+ return obj.to_dict()
133
+ elif isinstance(obj, dict):
134
+ return {k: convert_to_serializable(v) for k, v in obj.items()}
135
+ elif isinstance(obj, list):
136
+ return [convert_to_serializable(i) for i in obj]
137
+ return obj
138
+
139
+ # LLM Section
140
+ st.subheader("Ask Mistral About the Data", anchor="insights")
141
+ user_query = st.text_input("Enter your question:", "Summarize why these are the top 5 suppliers.")
142
+
143
+ # Load Mistral model from Hugging Face
144
+ @st.cache_resource
145
+ def load_mistral_pipeline():
146
+ model_id = "mistralai/Mistral-7B-Instruct-v0.1"
147
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
148
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto")
149
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)
150
+ return pipe
151
+
152
+ if user_query:
153
+ with st.spinner("Generating response..."):
154
+ pipe = load_mistral_pipeline()
155
+
156
+ # Prepare prompt
157
+ top_sellers_json = json.dumps(convert_to_serializable(top_sellers), indent=2)
158
+ filters_applied = {
159
+ "product_category": selected_categories,
160
+ "grade": selected_grades,
161
+ "ratings": selected_ratings
162
+ }
163
+ prompt = f"""You are a helpful assistant. Based on the dataset below and filters, answer the following user question.
164
+
165
+ Top 5 sellers:
166
+ {top_sellers_json}
167
+
168
+ Filters applied:
169
+ {json.dumps(filters_applied, indent=2)}
170
+
171
+ Question:
172
+ {user_query}
173
+ """
174
+ response = pipe(prompt)[0]['generated_text']
175
+ # Display only the assistant's answer (trim prompt if echoed)
176
+ st.markdown("**Mistral LLM Response:**")
177
+ st.write(response.split("Question:")[-1].strip())
178
+ else:
179
+ st.info("Enter a question to ask Mistral about the bidding data.")