Balaprime commited on
Commit
8d52236
·
verified ·
1 Parent(s): 217c2e0

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +121 -114
src/streamlit_app.py CHANGED
@@ -52,115 +52,120 @@ st.markdown("""
52
  </div>
53
  """, unsafe_allow_html=True)
54
 
55
- # Load dataset
56
- @st.cache_data
57
- def load_data():
58
- return pd.read_csv("construction_materials_dataset_3000.csv")
59
-
60
- df = load_data()
61
-
62
- # Filter section
63
- st.subheader("Filters", anchor="filters")
64
- with st.container():
65
- st.markdown('<div class="filter-container">', unsafe_allow_html=True)
66
- col1, col2, col3 = st.columns(3)
67
-
68
- with col1:
69
- product_categories = sorted(df['product_category'].dropna().unique())
70
- selected_categories = st.multiselect("Product Category", product_categories, default=product_categories)
71
-
72
- with col2:
73
- grades = sorted(df['grade'].dropna().unique())
74
- selected_grades = st.multiselect("Grade", grades, default=grades)
75
-
76
- with col3:
77
- ratings = sorted(df['ratings'].dropna().astype(str).unique())
78
- selected_ratings = st.multiselect("Ratings", ratings, default=ratings)
79
-
80
- st.markdown('</div>', unsafe_allow_html=True)
81
-
82
- # Apply filters
83
- filtered_df = df[
84
- (df['product_category'].isin(selected_categories)) &
85
- (df['grade'].isin(selected_grades)) &
86
- (df['ratings'].astype(str).isin(selected_ratings))
87
- ]
88
-
89
- # Aggregate by supplier
90
- seller_data = filtered_df.groupby("supplier_name").agg({
91
- "bidding_amount": "sum"
92
- }).reset_index()
93
-
94
- # Top 5 sellers
95
- top_sellers = seller_data.sort_values("bidding_amount", ascending=False).head(5)
96
-
97
- # Overview section
98
- st.subheader("Overview", anchor="overview")
99
- st.write("**Top 5 Suppliers by Total Bidding Amount**")
100
- fig_bar = px.bar(
101
- top_sellers,
102
- x="supplier_name",
103
- y="bidding_amount",
104
- labels={"supplier_name": "Supplier", "bidding_amount": "Bidding Amount"},
105
- title="Top 5 Suppliers",
106
- color="supplier_name"
107
- )
108
- fig_bar.update_layout(showlegend=False)
109
- st.plotly_chart(fig_bar, use_container_width=True)
110
-
111
- st.write("**Bidding Distribution (Top 5)**")
112
- fig_pie = px.pie(
113
- top_sellers,
114
- names="supplier_name",
115
- values="bidding_amount",
116
- title="Bidding Amount by Supplier"
117
- )
118
- st.plotly_chart(fig_pie, use_container_width=True)
119
-
120
- total_bidding = seller_data["bidding_amount"].sum()
121
- st.write(f"**Total Bidding Amount (All Suppliers):** ${total_bidding:,.2f}")
122
-
123
- # Convert complex types to simple Python types
124
- def convert_to_serializable(obj):
125
- if isinstance(obj, np.integer):
126
- return int(obj)
127
- elif isinstance(obj, np.floating):
128
- return float(obj)
129
- elif isinstance(obj, np.ndarray):
130
- return obj.tolist()
131
- elif isinstance(obj, (pd.Series, pd.DataFrame)):
132
- return obj.to_dict()
133
- elif isinstance(obj, dict):
134
- return {k: convert_to_serializable(v) for k, v in obj.items()}
135
- elif isinstance(obj, list):
136
- return [convert_to_serializable(i) for i in obj]
137
- return obj
138
-
139
- # LLM Section
140
- st.subheader("Ask Mistral About the Data", anchor="insights")
141
- user_query = st.text_input("Enter your question:", "Summarize why these are the top 5 suppliers.")
142
-
143
- # Load Mistral model from Hugging Face
144
- @st.cache_resource
145
- def load_mistral_pipeline():
146
- model_id = "mistralai/Mistral-7B-Instruct-v0.1"
147
- tokenizer = AutoTokenizer.from_pretrained(model_id)
148
- model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto")
149
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)
150
- return pipe
151
-
152
- if user_query:
153
- with st.spinner("Generating response..."):
154
- pipe = load_mistral_pipeline()
155
-
156
- # Prepare prompt
157
- top_sellers_json = json.dumps(convert_to_serializable(top_sellers), indent=2)
158
- filters_applied = {
159
- "product_category": selected_categories,
160
- "grade": selected_grades,
161
- "ratings": selected_ratings
162
- }
163
- prompt = f"""You are the helpful assistant. Based on the dataset below and filters, answer the following user question.
 
 
 
 
 
164
 
165
  Top 5 sellers:
166
  {top_sellers_json}
@@ -171,9 +176,11 @@ Filters applied:
171
  Question:
172
  {user_query}
173
  """
174
- response = pipe(prompt)[0]['generated_text']
175
- # Display only the assistant's answer (trim prompt if echoed)
176
- st.markdown("**Mistral LLM Response:**")
177
- st.write(response.split("Question:")[-1].strip())
 
 
178
  else:
179
- st.info("Enter a question to ask Mistral about the bidding data.")
 
52
  </div>
53
  """, unsafe_allow_html=True)
54
 
55
+ # File uploader for CSV
56
+ st.subheader("Upload Dataset")
57
+ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
58
+
59
+ if uploaded_file is not None:
60
+ # Load dataset from uploaded file
61
+ @st.cache_data
62
+ def load_data(uploaded_file):
63
+ return pd.read_csv(uploaded_file)
64
+
65
+ df = load_data(uploaded_file)
66
+
67
+ # Filter section
68
+ st.subheader("Filters", anchor="filters")
69
+ with st.container():
70
+ st.markdown('<div class="filter-container">', unsafe_allow_html=True)
71
+ col1, col2, col3 = st.columns(3)
72
+
73
+ with col1:
74
+ product_categories = sorted(df['product_category'].dropna().unique())
75
+ selected_categories = st.multiselect("Product Category", product_categories, default=product_categories)
76
+
77
+ with col2:
78
+ grades = sorted(df['grade'].dropna().unique())
79
+ selected_grades = st.multiselect("Grade", grades, default=grades)
80
+
81
+ with col3:
82
+ ratings = sorted(df['ratings'].dropna().astype(str).unique())
83
+ selected_ratings = st.multiselect("Ratings", ratings, default=ratings)
84
+
85
+ st.markdown('</div>', unsafe_allow_html=True)
86
+
87
+ # Apply filters
88
+ filtered_df = df[
89
+ (df['product_category'].isin(selected_categories)) &
90
+ (df['grade'].isin(selected_grades)) &
91
+ (df['ratings'].astype(str).isin(selected_ratings))
92
+ ]
93
+
94
+ # Aggregate by supplier
95
+ seller_data = filtered_df.groupby("supplier_name").agg({
96
+ "bidding_amount": "sum"
97
+ }).reset_index()
98
+
99
+ # Top 5 sellers
100
+ top_sellers = seller_data.sort_values("bidding_amount", ascending=False).head(5)
101
+
102
+ # Overview section
103
+ st.subheader("Overview", anchor="overview")
104
+ st.write("**Top 5 Suppliers by Total Bidding Amount**")
105
+ fig_bar = px.bar(
106
+ top_sellers,
107
+ x="supplier_name",
108
+ y="bidding_amount",
109
+ labels={"supplier_name": "Supplier", "bidding_amount": "Bidding Amount"},
110
+ title="Top 5 Suppliers",
111
+ color="supplier_name"
112
+ )
113
+ fig_bar.update_layout(showlegend=False)
114
+ st.plotly_chart(fig_bar, use_container_width=True)
115
+
116
+ st.write("**Bidding Distribution (Top 5)**")
117
+ fig_pie = px.pie(
118
+ top_sellers,
119
+ names="supplier_name",
120
+ values="bidding_amount",
121
+ title="Bidding Amount by Supplier"
122
+ )
123
+ st.plotly_chart(fig_pie, use_container_width=True)
124
+
125
+ total_bidding = seller_data["bidding_amount"].sum()
126
+ st.write(f"**Total Bidding Amount (All Suppliers):** ${total_bidding:,.2f}")
127
+
128
+ # Convert complex types to simple Python types
129
+ def convert_to_serializable(obj):
130
+ if isinstance(obj, np.integer):
131
+ return int(obj)
132
+ elif isinstance(obj, np.floating):
133
+ return float(obj)
134
+ elif isinstance(obj, np.ndarray):
135
+ return obj.tolist()
136
+ elif isinstance(obj, (pd.Series, pd.DataFrame)):
137
+ return obj.to_dict()
138
+ elif isinstance(obj, dict):
139
+ return {k: convert_to_serializable(v) for k, v in obj.items()}
140
+ elif isinstance(obj, list):
141
+ return [convert_to_serializable(i) for i in obj]
142
+ return obj
143
+
144
+ # LLM Section
145
+ st.subheader("Ask Mistral About the Data", anchor="insights")
146
+ user_query = st.text_input("Enter your question:", "Summarize why these are the top 5 suppliers.")
147
+
148
+ # Load Mistral model from Hugging Face
149
+ @st.cache_resource
150
+ def load_mistral_pipeline():
151
+ model_id = "mistralai/Mistral-7B-Instruct-v0.1"
152
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
153
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto")
154
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)
155
+ return pipe
156
+
157
+ if user_query:
158
+ with st.spinner("Generating response..."):
159
+ pipe = load_mistral_pipeline()
160
+
161
+ # Prepare prompt
162
+ top_sellers_json = json.dumps(convert_to_serializable(top_sellers), indent=2)
163
+ filters_applied = {
164
+ "product_category": selected_categories,
165
+ "grade": selected_grades,
166
+ "ratings": selected_ratings
167
+ }
168
+ prompt = f"""You are the helpful assistant. Based on the dataset below and filters, answer the following user question.
169
 
170
  Top 5 sellers:
171
  {top_sellers_json}
 
176
  Question:
177
  {user_query}
178
  """
179
+ response = pipe(prompt)[0]['generated_text']
180
+ # Display only the assistant's answer (trim prompt if echoed)
181
+ st.markdown("**Mistral LLM Response:**")
182
+ st.write(response.split("Question:")[-1].strip())
183
+ else:
184
+ st.info("Enter a question to ask Mistral about the bidding data.")
185
  else:
186
+ st.warning("Please upload a CSV file to proceed.")