rairo commited on
Commit
d1fb2e6
Β·
verified Β·
1 Parent(s): 068c1f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +275 -128
app.py CHANGED
@@ -46,16 +46,18 @@ FONT_REG = FONT_DIR / "NotoSans-Regular.ttf"
46
  FONT_BLD = FONT_DIR / "NotoSans-Bold.ttf"
47
  FONT_FAM = "NotoSans"
48
  SLIDES = 7
49
- API_KEY = os.getenv("GEMINI_API_KEY")
50
 
 
 
51
  if not API_KEY:
52
- st.error("Error: GEMINI_API_KEY environment variable is not set.")
 
53
  st.stop()
54
 
55
  try:
56
  GEM = genai.Client(api_key=API_KEY)
57
  except Exception as e:
58
- st.error(f"Failed to initialize Google GenAI Client: {e}")
59
  st.stop()
60
 
61
  # ─────────────────────────────────────────────────────────────────────────────
@@ -67,15 +69,87 @@ if "slide_idx" not in st.session_state:
67
  st.session_state.slide_idx = 0
68
  if "active_bundle_key" not in st.session_state:
69
  st.session_state.active_bundle_key = None
 
 
70
 
71
  # ─────────────────────────────────────────────────────────────────────────────
72
  # HELPERS
73
  # ─────────────────────────────────────────────────────────────────────────────
74
  sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
  def fix_bullet(text: str) -> str:
78
  """Replace Windows-1252 bullets/dashes/quotes and strip other bad chars."""
 
 
 
79
  subs = {
80
  "\x95": "β€’",
81
  "\x96": "-",
@@ -89,7 +163,6 @@ def fix_bullet(text: str) -> str:
89
  text = text.replace(bad, good)
90
  return re.sub(r"[\x80-\x9f]", "", text)
91
 
92
-
93
  def convert_pcm_to_wav(pcm_data: bytes, sample_rate=24000, channels=1, sample_width=2) -> bytes:
94
  """Wrap raw PCM data in a WAV container (in-memory)."""
95
  buf = io.BytesIO()
@@ -101,7 +174,6 @@ def convert_pcm_to_wav(pcm_data: bytes, sample_rate=24000, channels=1, sample_wi
101
  buf.seek(0)
102
  return buf.getvalue()
103
 
104
-
105
  # ─── Gemini TTS ──────────────────────────────────────────────────────────────
106
  @st.cache_data(show_spinner=False)
107
  def generate_tts_audio(text_to_speak: str):
@@ -127,97 +199,102 @@ def generate_tts_audio(text_to_speak: str):
127
  except Exception:
128
  return None, None
129
 
130
-
131
  # ─── Chart-tag regex (single source of truth) ────────────────────────────────
132
  TAG_RE = re.compile(
133
  r'[<\[]\s*generate_?chart\s*[:=]?\s*["\']?\s*([^>\]"\']+?)\s*["\']?\s*[>\]]',
134
  flags=re.IGNORECASE,
135
  )
136
 
137
-
138
  def extract_chart_tags(text: str) -> list[str]:
139
  """Return unique chart descriptors, order-preserved."""
 
 
140
  return list(dict.fromkeys(TAG_RE.findall(text)))
141
 
142
-
143
  def replace_chart_tags(text: str, chart_map: dict[str, str], repl_func) -> str:
144
  """Replace chart placeholders with `repl_func(path)` if tag in chart_map."""
 
 
145
  return TAG_RE.sub(
146
  lambda m: repl_func(chart_map[m.group(1)]) if m.group(1) in chart_map else m.group(0),
147
  text,
148
  )
149
 
150
-
151
  # ─────────────────────────────────────────────────────────────────────────────
152
  # PDF & PPTX BUILDERS
153
  # ─────────────────────────────────────────────────────────────────────────────
154
  class PDF(FPDF, HTMLMixin):
155
  pass
156
 
157
-
158
  def build_pdf(markdown_src: str, chart_map: dict[str, str]) -> bytes:
159
- markdown_src = fix_bullet(markdown_src).replace("β€’", "*")
160
- markdown_src = replace_chart_tags(markdown_src, chart_map, lambda p: f'<img src="{p}">')
161
- html = MarkdownIt("commonmark", {"breaks": True}).enable("table").render(markdown_src)
162
-
163
- pdf = PDF()
164
- pdf.set_auto_page_break(True, margin=15)
165
- fonts_added = False
166
- for style, ttf in [("", FONT_REG), ("B", FONT_BLD)]:
167
- if ttf.exists():
168
- try:
169
- pdf.add_font(FONT_FAM, style, str(ttf), uni=True)
170
- fonts_added = True
171
- except Exception:
172
- pass
173
- if fonts_added:
174
- pdf.set_fallback_fonts([FONT_FAM])
175
-
176
- pdf.add_page()
177
- pdf.set_font(FONT_FAM if fonts_added else "Arial", "B", 18)
178
- pdf.cell(0, 12, "AI-Generated Business Report", ln=True)
179
- pdf.ln(3)
180
- pdf.set_font(FONT_FAM if fonts_added else "Arial", "", 11)
181
- pdf.write_html(html)
182
- return bytes(pdf.output(dest="S"))
183
-
184
-
185
- def build_pptx(slides: tuple[str, ...], chart_map: dict[str, str]) -> bytes:
186
- prs = Presentation()
187
- layout = prs.slide_layouts[1]
188
-
189
- for raw in slides:
190
- if not raw.strip():
191
- continue
192
- raw_clean = fix_bullet(raw)
193
- chart_tags = extract_chart_tags(raw_clean)
194
-
195
- title, *body_lines = [ln.strip(" β€’-") for ln in raw_clean.splitlines() if ln.strip()]
196
- slide = prs.slides.add_slide(layout)
197
- slide.shapes.title.text = title or "Slide"
198
-
199
- tf = slide.shapes.placeholders[1].text_frame
200
- tf.clear()
201
- tf.word_wrap = True
202
- for line in body_lines:
203
- if "generate_chart" in line.lower():
204
- continue
205
- p = tf.add_paragraph()
206
- p.text = line
207
- p.font.size = Pt(20)
208
-
209
- for tag in chart_tags:
210
- if tag in chart_map:
211
  try:
212
- slide.shapes.add_picture(chart_map[tag], Inches(1), Inches(3.5), width=Inches(8))
213
- break
214
  except Exception:
215
  pass
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
- bio = io.BytesIO()
218
- prs.save(bio)
219
- return bio.getvalue()
 
220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
 
222
  # ─────────────────────────────────────────────────────────────────────────────
223
  # SIMPLIFIED GENERATION LOGIC (without complex agents)
@@ -225,32 +302,41 @@ def build_pptx(slides: tuple[str, ...], chart_map: dict[str, str]) -> bytes:
225
  @st.cache_data(show_spinner=False)
226
  def generate_assets(_input_key, file_bytes, upl_name, mode, ctx):
227
  """Generate business assets using direct LLM calls and LangChain."""
228
- # 1) Load data
 
 
 
 
 
 
 
229
  try:
230
- df = (
231
- pd.read_excel(io.BytesIO(file_bytes))
232
- if upl_name.lower().endswith(".xlsx")
233
- else pd.read_csv(io.BytesIO(file_bytes))
234
  )
235
  except Exception as e:
236
- st.error(f"Failed to load data: {e}")
237
  return None
238
 
239
- # 2) Initialize LLM
240
- llm = ChatGoogleGenerativeAI(
241
- model="gemini-2.5-flash",
242
- google_api_key=API_KEY,
243
- temperature=0.1
244
- )
245
-
246
- # 3) Data context
247
- data_ctx = {
248
- "shape": df.shape,
249
- "columns": list(df.columns),
250
- "dtypes": df.dtypes.astype(str).to_dict(),
251
- "sample": df.head(3).to_dict(),
252
- "user_ctx": ctx or "General business analysis",
253
- }
 
 
254
 
255
  # 4) Generate content based on mode
256
  outputs = {}
@@ -266,6 +352,7 @@ def generate_assets(_input_key, file_bytes, upl_name, mode, ctx):
266
  <generate_chart: "description of chart">
267
 
268
  Structure the report with clear sections and professional formatting.
 
269
  """
270
  try:
271
  report_response = llm.invoke(report_prompt)
@@ -297,55 +384,65 @@ def generate_assets(_input_key, file_bytes, upl_name, mode, ctx):
297
  st.error("No content was generated.")
298
  return None
299
 
300
- # 5) Chart generation
301
  chart_descs = extract_chart_tags("\n".join(outputs.values()))
302
  chart_paths = {}
303
 
304
  if chart_descs:
305
- chart_agent = create_pandas_dataframe_agent(
306
- llm=llm,
307
- df=df,
308
- verbose=False,
309
- allow_dangerous_code=True,
310
- )
311
- for desc in chart_descs:
312
- with plt.ioff(): # Turn off interactive plotting
313
- try:
314
- chart_agent.run(
315
- f"Create a {desc} with matplotlib. Use clear labels & title; call plt.savefig when done."
316
- )
317
- fig = plt.gcf()
318
- if fig.get_axes():
319
- path = Path(tempfile.gettempdir()) / f"chart_{uuid.uuid4()}.png"
320
- fig.savefig(path, dpi=300, bbox_inches="tight", facecolor="white")
321
- chart_paths[desc] = str(path)
322
- plt.close("all")
323
- except Exception as e:
324
- print(f"Chart generation for '{desc}' failed: {e}") # For debugging
325
- plt.close("all") # Ensure figure is closed even on error
326
- pass
 
 
 
 
327
 
328
  # 6) Assemble outputs
329
  pdf_bytes = pptx_bytes = preview_md = None
330
  slides = []
331
 
332
  if "ReportAgent" in outputs:
333
- md_raw = fix_bullet(outputs["ReportAgent"])
334
- pdf_bytes = build_pdf(md_raw, chart_paths)
335
- preview_md = replace_chart_tags(
336
- md_raw,
337
- chart_paths,
338
- lambda p: f'<img src="data:image/png;base64,{base64.b64encode(open(p,"rb").read()).decode()}" style="max-width:100%;">',
339
- )
 
 
 
340
 
341
  if "PresentationAgent" in outputs:
342
- raw_slides_text = fix_bullet(outputs["PresentationAgent"])
343
- # robust splitter – starts at lines beginning with "Slide n"
344
- parts = re.split(r"(?im)^\s*slide\s+\d+\s*-?", raw_slides_text)[1:]
345
- slides = [p.strip() for p in parts if p.strip()]
 
346
 
347
- if slides:
348
- pptx_bytes = build_pptx(tuple(slides), chart_paths)
 
 
349
 
350
  return {
351
  "preview_md": preview_md,
@@ -356,19 +453,67 @@ def generate_assets(_input_key, file_bytes, upl_name, mode, ctx):
356
  "chart_count": len(chart_paths),
357
  }
358
 
359
-
360
  # ─────────────────────────────────────────────────────────────────────────────
361
  # UI
362
  # ─────────────────────────────────────────────────────────────────────────────
363
  mode = st.radio("Choose output format:", ["Report", "Presentation", "Both"], horizontal=True, index=2)
364
- upl = st.file_uploader("Upload business data", ["csv", "xlsx"])
365
- ctx = st.text_area("Business context (optional)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
 
367
  if not st.button("πŸš€ Generate Narrative", type="primary"):
368
  st.stop()
369
 
370
  if not upl:
371
- st.warning("Please upload a CSV or XLSX file.")
372
  st.stop()
373
 
374
  # Generate unique bundle key for caching
@@ -377,6 +522,7 @@ bundle_key = sha1_bytes(b"".join([upl.getvalue(), mode.encode(), ctx.encode()]))
377
  # Check if we already have this bundle cached
378
  if bundle_key in st.session_state.bundles:
379
  bundle = st.session_state.bundles[bundle_key]
 
380
  else:
381
  with st.spinner("πŸ€– AI is analyzing your data and generating content..."):
382
  bundle = generate_assets(bundle_key, upl.getvalue(), upl.name, mode, ctx)
@@ -384,6 +530,7 @@ else:
384
  st.session_state.bundles[bundle_key] = bundle
385
 
386
  if not bundle:
 
387
  st.stop()
388
 
389
  if bundle.get("chart_count"):
 
46
  FONT_BLD = FONT_DIR / "NotoSans-Bold.ttf"
47
  FONT_FAM = "NotoSans"
48
  SLIDES = 7
 
49
 
50
+ # Enhanced API key handling
51
+ API_KEY = os.getenv("GEMINI_API_KEY")
52
  if not API_KEY:
53
+ st.error("⚠️ Error: GEMINI_API_KEY environment variable is not set.")
54
+ st.info("Please set your Google Gemini API key in the environment variables.")
55
  st.stop()
56
 
57
  try:
58
  GEM = genai.Client(api_key=API_KEY)
59
  except Exception as e:
60
+ st.error(f"❌ Failed to initialize Google GenAI Client: {e}")
61
  st.stop()
62
 
63
  # ─────────────────────────────────────────────────────────────────────────────
 
69
  st.session_state.slide_idx = 0
70
  if "active_bundle_key" not in st.session_state:
71
  st.session_state.active_bundle_key = None
72
+ if "upload_errors" not in st.session_state:
73
+ st.session_state.upload_errors = []
74
 
75
  # ─────────────────────────────────────────────────────────────────────────────
76
  # HELPERS
77
  # ─────────────────────────────────────────────────────────────────────────────
78
  sha1_bytes = lambda b: hashlib.sha1(b).hexdigest()
79
 
80
+ def validate_file_upload(uploaded_file):
81
+ """Validate uploaded file and return error messages if any."""
82
+ errors = []
83
+
84
+ if uploaded_file is None:
85
+ errors.append("No file uploaded")
86
+ return errors
87
+
88
+ # Check file size (limit to 50MB)
89
+ if uploaded_file.size > 50 * 1024 * 1024:
90
+ errors.append("File size exceeds 50MB limit")
91
+
92
+ # Check file extension
93
+ allowed_extensions = ['.csv', '.xlsx', '.xls']
94
+ file_ext = Path(uploaded_file.name).suffix.lower()
95
+ if file_ext not in allowed_extensions:
96
+ errors.append(f"File type '{file_ext}' not supported. Please upload CSV or Excel files.")
97
+
98
+ # Check if file has content
99
+ if uploaded_file.size == 0:
100
+ errors.append("File is empty")
101
+
102
+ return errors
103
+
104
+ def load_dataframe_safely(file_bytes, filename):
105
+ """Safely load DataFrame with proper error handling."""
106
+ try:
107
+ file_ext = Path(filename).suffix.lower()
108
+
109
+ if file_ext == '.csv':
110
+ # Try different encodings for CSV
111
+ for encoding in ['utf-8', 'latin-1', 'cp1252']:
112
+ try:
113
+ df = pd.read_csv(io.BytesIO(file_bytes), encoding=encoding)
114
+ break
115
+ except UnicodeDecodeError:
116
+ continue
117
+ else:
118
+ # If all encodings fail, try with error handling
119
+ df = pd.read_csv(io.BytesIO(file_bytes), encoding='utf-8', errors='replace')
120
+
121
+ elif file_ext in ['.xlsx', '.xls']:
122
+ df = pd.read_excel(io.BytesIO(file_bytes))
123
+
124
+ else:
125
+ raise ValueError(f"Unsupported file format: {file_ext}")
126
+
127
+ # Basic DataFrame validation
128
+ if df.empty:
129
+ raise ValueError("The uploaded file contains no data")
130
+
131
+ if len(df.columns) == 0:
132
+ raise ValueError("The uploaded file has no columns")
133
+
134
+ # Clean column names
135
+ df.columns = df.columns.astype(str).str.strip()
136
+
137
+ # Remove completely empty rows
138
+ df = df.dropna(how='all')
139
+
140
+ if df.empty:
141
+ raise ValueError("All rows in the file are empty")
142
+
143
+ return df, None
144
+
145
+ except Exception as e:
146
+ return None, str(e)
147
 
148
  def fix_bullet(text: str) -> str:
149
  """Replace Windows-1252 bullets/dashes/quotes and strip other bad chars."""
150
+ if not isinstance(text, str):
151
+ return ""
152
+
153
  subs = {
154
  "\x95": "β€’",
155
  "\x96": "-",
 
163
  text = text.replace(bad, good)
164
  return re.sub(r"[\x80-\x9f]", "", text)
165
 
 
166
  def convert_pcm_to_wav(pcm_data: bytes, sample_rate=24000, channels=1, sample_width=2) -> bytes:
167
  """Wrap raw PCM data in a WAV container (in-memory)."""
168
  buf = io.BytesIO()
 
174
  buf.seek(0)
175
  return buf.getvalue()
176
 
 
177
  # ─── Gemini TTS ──────────────────────────────────────────────────────────────
178
  @st.cache_data(show_spinner=False)
179
  def generate_tts_audio(text_to_speak: str):
 
199
  except Exception:
200
  return None, None
201
 
 
202
  # ─── Chart-tag regex (single source of truth) ────────────────────────────────
203
  TAG_RE = re.compile(
204
  r'[<\[]\s*generate_?chart\s*[:=]?\s*["\']?\s*([^>\]"\']+?)\s*["\']?\s*[>\]]',
205
  flags=re.IGNORECASE,
206
  )
207
 
 
208
  def extract_chart_tags(text: str) -> list[str]:
209
  """Return unique chart descriptors, order-preserved."""
210
+ if not isinstance(text, str):
211
+ return []
212
  return list(dict.fromkeys(TAG_RE.findall(text)))
213
 
 
214
  def replace_chart_tags(text: str, chart_map: dict[str, str], repl_func) -> str:
215
  """Replace chart placeholders with `repl_func(path)` if tag in chart_map."""
216
+ if not isinstance(text, str):
217
+ return ""
218
  return TAG_RE.sub(
219
  lambda m: repl_func(chart_map[m.group(1)]) if m.group(1) in chart_map else m.group(0),
220
  text,
221
  )
222
 
 
223
  # ─────────────────────────────────────────────────────────────────────────────
224
  # PDF & PPTX BUILDERS
225
  # ─────────────────────────────────────────────────────────────────────────────
226
  class PDF(FPDF, HTMLMixin):
227
  pass
228
 
 
229
  def build_pdf(markdown_src: str, chart_map: dict[str, str]) -> bytes:
230
+ try:
231
+ markdown_src = fix_bullet(markdown_src).replace("β€’", "*")
232
+ markdown_src = replace_chart_tags(markdown_src, chart_map, lambda p: f'<img src="{p}">')
233
+ html = MarkdownIt("commonmark", {"breaks": True}).enable("table").render(markdown_src)
234
+
235
+ pdf = PDF()
236
+ pdf.set_auto_page_break(True, margin=15)
237
+ fonts_added = False
238
+ for style, ttf in [("", FONT_REG), ("B", FONT_BLD)]:
239
+ if ttf.exists():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  try:
241
+ pdf.add_font(FONT_FAM, style, str(ttf), uni=True)
242
+ fonts_added = True
243
  except Exception:
244
  pass
245
+ if fonts_added:
246
+ pdf.set_fallback_fonts([FONT_FAM])
247
+
248
+ pdf.add_page()
249
+ pdf.set_font(FONT_FAM if fonts_added else "Arial", "B", 18)
250
+ pdf.cell(0, 12, "AI-Generated Business Report", ln=True)
251
+ pdf.ln(3)
252
+ pdf.set_font(FONT_FAM if fonts_added else "Arial", "", 11)
253
+ pdf.write_html(html)
254
+ return bytes(pdf.output(dest="S"))
255
+ except Exception as e:
256
+ st.error(f"PDF generation failed: {e}")
257
+ return b""
258
 
259
+ def build_pptx(slides: tuple[str, ...], chart_map: dict[str, str]) -> bytes:
260
+ try:
261
+ prs = Presentation()
262
+ layout = prs.slide_layouts[1]
263
 
264
+ for raw in slides:
265
+ if not raw.strip():
266
+ continue
267
+ raw_clean = fix_bullet(raw)
268
+ chart_tags = extract_chart_tags(raw_clean)
269
+
270
+ title, *body_lines = [ln.strip(" β€’-") for ln in raw_clean.splitlines() if ln.strip()]
271
+ slide = prs.slides.add_slide(layout)
272
+ slide.shapes.title.text = title or "Slide"
273
+
274
+ tf = slide.shapes.placeholders[1].text_frame
275
+ tf.clear()
276
+ tf.word_wrap = True
277
+ for line in body_lines:
278
+ if "generate_chart" in line.lower():
279
+ continue
280
+ p = tf.add_paragraph()
281
+ p.text = line
282
+ p.font.size = Pt(20)
283
+
284
+ for tag in chart_tags:
285
+ if tag in chart_map:
286
+ try:
287
+ slide.shapes.add_picture(chart_map[tag], Inches(1), Inches(3.5), width=Inches(8))
288
+ break
289
+ except Exception:
290
+ pass
291
+
292
+ bio = io.BytesIO()
293
+ prs.save(bio)
294
+ return bio.getvalue()
295
+ except Exception as e:
296
+ st.error(f"PowerPoint generation failed: {e}")
297
+ return b""
298
 
299
  # ─────────────────────────────────────────────────────────────────────────────
300
  # SIMPLIFIED GENERATION LOGIC (without complex agents)
 
302
  @st.cache_data(show_spinner=False)
303
  def generate_assets(_input_key, file_bytes, upl_name, mode, ctx):
304
  """Generate business assets using direct LLM calls and LangChain."""
305
+
306
+ # 1) Load data with error handling
307
+ df, load_error = load_dataframe_safely(file_bytes, upl_name)
308
+ if load_error:
309
+ st.error(f"Failed to load data: {load_error}")
310
+ return None
311
+
312
+ # 2) Initialize LLM with error handling
313
  try:
314
+ llm = ChatGoogleGenerativeAI(
315
+ model="gemini-2.5-flash",
316
+ google_api_key=API_KEY,
317
+ temperature=0.1
318
  )
319
  except Exception as e:
320
+ st.error(f"Failed to initialize LLM: {e}")
321
  return None
322
 
323
+ # 3) Data context with error handling
324
+ try:
325
+ # Get basic stats safely
326
+ numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
327
+ sample_data = df.head(3).fillna("N/A").to_dict()
328
+
329
+ data_ctx = {
330
+ "shape": df.shape,
331
+ "columns": list(df.columns),
332
+ "dtypes": df.dtypes.astype(str).to_dict(),
333
+ "sample": sample_data,
334
+ "numeric_columns": numeric_cols,
335
+ "user_ctx": ctx or "General business analysis",
336
+ }
337
+ except Exception as e:
338
+ st.error(f"Failed to analyze data structure: {e}")
339
+ return None
340
 
341
  # 4) Generate content based on mode
342
  outputs = {}
 
352
  <generate_chart: "description of chart">
353
 
354
  Structure the report with clear sections and professional formatting.
355
+ Keep the analysis relevant to the business context provided.
356
  """
357
  try:
358
  report_response = llm.invoke(report_prompt)
 
384
  st.error("No content was generated.")
385
  return None
386
 
387
+ # 5) Chart generation with enhanced error handling
388
  chart_descs = extract_chart_tags("\n".join(outputs.values()))
389
  chart_paths = {}
390
 
391
  if chart_descs:
392
+ try:
393
+ chart_agent = create_pandas_dataframe_agent(
394
+ llm=llm,
395
+ df=df,
396
+ verbose=False,
397
+ allow_dangerous_code=True,
398
+ )
399
+
400
+ for desc in chart_descs:
401
+ with plt.ioff(): # Turn off interactive plotting
402
+ try:
403
+ chart_agent.run(
404
+ f"Create a {desc} with matplotlib. Use clear labels & title; call plt.savefig when done."
405
+ )
406
+ fig = plt.gcf()
407
+ if fig.get_axes():
408
+ path = Path(tempfile.gettempdir()) / f"chart_{uuid.uuid4()}.png"
409
+ fig.savefig(path, dpi=300, bbox_inches="tight", facecolor="white")
410
+ chart_paths[desc] = str(path)
411
+ plt.close("all")
412
+ except Exception as e:
413
+ print(f"Chart generation for '{desc}' failed: {e}")
414
+ plt.close("all")
415
+ pass
416
+ except Exception as e:
417
+ st.warning(f"Chart generation system failed: {e}")
418
 
419
  # 6) Assemble outputs
420
  pdf_bytes = pptx_bytes = preview_md = None
421
  slides = []
422
 
423
  if "ReportAgent" in outputs:
424
+ try:
425
+ md_raw = fix_bullet(outputs["ReportAgent"])
426
+ pdf_bytes = build_pdf(md_raw, chart_paths)
427
+ preview_md = replace_chart_tags(
428
+ md_raw,
429
+ chart_paths,
430
+ lambda p: f'<img src="data:image/png;base64,{base64.b64encode(open(p,"rb").read()).decode()}" style="max-width:100%;">',
431
+ )
432
+ except Exception as e:
433
+ st.error(f"Report processing failed: {e}")
434
 
435
  if "PresentationAgent" in outputs:
436
+ try:
437
+ raw_slides_text = fix_bullet(outputs["PresentationAgent"])
438
+ # robust splitter – starts at lines beginning with "Slide n"
439
+ parts = re.split(r"(?im)^\s*slide\s+\d+\s*-?", raw_slides_text)[1:]
440
+ slides = [p.strip() for p in parts if p.strip()]
441
 
442
+ if slides:
443
+ pptx_bytes = build_pptx(tuple(slides), chart_paths)
444
+ except Exception as e:
445
+ st.error(f"Presentation processing failed: {e}")
446
 
447
  return {
448
  "preview_md": preview_md,
 
453
  "chart_count": len(chart_paths),
454
  }
455
 
 
456
  # ─────────────────────────────────────────────────────────────────────────────
457
  # UI
458
  # ─────────────────────────────────────────────────────────────────────────────
459
  mode = st.radio("Choose output format:", ["Report", "Presentation", "Both"], horizontal=True, index=2)
460
+
461
+ # Enhanced file uploader with validation
462
+ st.subheader("πŸ“ Upload Your Business Data")
463
+ upl = st.file_uploader(
464
+ "Choose a CSV or Excel file",
465
+ type=["csv", "xlsx", "xls"],
466
+ help="Supported formats: CSV, Excel (.xlsx, .xls). Maximum file size: 50MB"
467
+ )
468
+
469
+ # File validation
470
+ if upl is not None:
471
+ upload_errors = validate_file_upload(upl)
472
+
473
+ if upload_errors:
474
+ for error in upload_errors:
475
+ st.error(f"❌ {error}")
476
+ st.stop()
477
+ else:
478
+ # Show file info if valid
479
+ st.success(f"βœ… File '{upl.name}' uploaded successfully ({upl.size:,} bytes)")
480
+
481
+ # Preview data
482
+ try:
483
+ df_preview, preview_error = load_dataframe_safely(upl.getvalue(), upl.name)
484
+ if preview_error:
485
+ st.error(f"❌ Error reading file: {preview_error}")
486
+ st.stop()
487
+ else:
488
+ with st.expander("πŸ“Š Data Preview", expanded=False):
489
+ st.write(f"**Shape:** {df_preview.shape[0]} rows Γ— {df_preview.shape[1]} columns")
490
+ st.write("**Sample Data:**")
491
+ st.dataframe(df_preview.head())
492
+
493
+ # Show column info
494
+ col_info = pd.DataFrame({
495
+ 'Column': df_preview.columns,
496
+ 'Type': df_preview.dtypes,
497
+ 'Non-Null Count': df_preview.count(),
498
+ 'Null Count': df_preview.isnull().sum()
499
+ })
500
+ st.write("**Column Information:**")
501
+ st.dataframe(col_info)
502
+ except Exception as e:
503
+ st.error(f"❌ Error previewing file: {e}")
504
+ st.stop()
505
+
506
+ ctx = st.text_area(
507
+ "Business context (optional)",
508
+ placeholder="e.g., This is sales data for Q4 2024, focusing on regional performance...",
509
+ help="Provide context about your data to get more relevant insights"
510
+ )
511
 
512
  if not st.button("πŸš€ Generate Narrative", type="primary"):
513
  st.stop()
514
 
515
  if not upl:
516
+ st.warning("⚠️ Please upload a CSV or Excel file to continue.")
517
  st.stop()
518
 
519
  # Generate unique bundle key for caching
 
522
  # Check if we already have this bundle cached
523
  if bundle_key in st.session_state.bundles:
524
  bundle = st.session_state.bundles[bundle_key]
525
+ st.info("πŸ”„ Using cached results for this configuration.")
526
  else:
527
  with st.spinner("πŸ€– AI is analyzing your data and generating content..."):
528
  bundle = generate_assets(bundle_key, upl.getvalue(), upl.name, mode, ctx)
 
530
  st.session_state.bundles[bundle_key] = bundle
531
 
532
  if not bundle:
533
+ st.error("❌ Failed to generate content. Please try again.")
534
  st.stop()
535
 
536
  if bundle.get("chart_count"):