rairo commited on
Commit
2fa1ed7
Β·
verified Β·
1 Parent(s): 6a5d04e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -138
app.py CHANGED
@@ -1,13 +1,5 @@
1
- # ────────────────────────────────────────────────────────────────────────────────
2
- # app.py – Streamlit AI Business-Report demo (Google Gemini + ADK)
3
- # Requires: fpdf2 >= 2.7, streamlit, pandas, matplotlib, google-adk,
4
- # langchain-google-genai, langchain-experimental
5
- # Fonts: NotoSans-Regular.ttf & NotoSans-Bold.ttf are expected to live in the
6
- # same folder as this file (or adjust FONT_DIR below).
7
- # ────────────────────────────────────────────────────────────────────────────────
8
  import os, re, json, uuid, tempfile, asyncio
9
  from pathlib import Path
10
- import contextlib
11
 
12
  import pandas as pd
13
  import streamlit as st
@@ -15,43 +7,52 @@ import matplotlib
15
  matplotlib.use("Agg")
16
  import matplotlib.pyplot as plt
17
 
18
- from fpdf import FPDF, HTMLMixin # ➜ use HTML engine
19
- from markdown import markdown # ➜ md β†’ html
 
20
 
21
- # Google AI & ADK
22
- from google import genai
23
- from google.adk.agents import LlmAgent, SequentialAgent
24
- from google.adk.runners import Runner
25
- from google.adk.sessions import InMemorySessionService
26
- from google.genai import types
27
-
28
- # LangChain helper
29
- from langchain_experimental.agents import create_pandas_dataframe_agent
30
- from langchain_google_genai import ChatGoogleGenerativeAI
31
-
32
- # ──────────────────────────────────────────────────────────
33
  # 1️⃣ Environment & font setup
34
- # ──────────────────────────────────────────────────────────
35
  os.environ["STREAMLIT_CONFIG_DIR"] = tempfile.gettempdir()
36
  os.environ["MPLCONFIGDIR"] = tempfile.gettempdir()
37
 
38
- HERE = Path(__file__).parent
39
- FONT_NAME = "NotoSans"
40
- FONT_REGULAR_TTF = HERE / "NotoSans-Regular.ttf"
41
- FONT_BOLD_TTF = HERE / "NotoSans-Bold.ttf"
42
 
43
- # ──────────────────────────────────────────────────────────
44
- # 2️⃣ Little helpers
45
- # ──────────────────────────────────────────────────────────
46
- class PDF(FPDF, HTMLMixin):
47
- """FPDF subclass with HTML support and sensible defaults."""
48
- pass
49
 
 
 
 
 
50
 
51
- CHART_TAG_REGEX = r'<generate_chart:\s*"([^"]+)"\s*>'
 
 
 
 
 
 
52
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
- async def run_with_runner_async(root_agent, context):
55
  svc = InMemorySessionService()
56
  app_name = "business_report_app"
57
  user_id = "user1"
@@ -66,56 +67,22 @@ async def run_with_runner_async(root_agent, context):
66
  session_service=svc)
67
 
68
  content = types.Content(role="user",
69
- parts=[types.Part(text=json.dumps(context))])
70
 
71
  async for event in runner.run_async(user_id=user_id,
72
  session_id=session_id,
73
  new_message=content):
74
  if event.is_final_response():
75
  return event.content.parts[0].text
76
- return None
77
-
78
-
79
- def add_markdown_fragment(pdf: PDF, md_text: str):
80
- """Render a chunk of Markdown into the PDF using HTMLMixin."""
81
- html = markdown(md_text, extensions=["tables", "fenced_code"])
82
- pdf.write_html(html)
83
-
84
-
85
- def save_current_figure() -> Path:
86
- """Save the current matplotlib figure to a PNG and return the path."""
87
- path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
88
- plt.tight_layout()
89
- plt.savefig(path, dpi=300, bbox_inches="tight")
90
- return path
91
-
92
-
93
- # ──────────────────────────────────────────────────────────
94
- # 3️⃣ Streamlit UI
95
- # ──────────────────────────────────────────────────────────
96
- st.set_page_config(page_title="πŸ“Š AI Business Report", layout="wide")
97
- st.title("πŸ“Š AI-Generated Business Report")
98
-
99
- api_key = os.getenv("GEMINI_API_KEY")
100
- if not api_key:
101
- st.error("❌ Environment variable `GEMINI_API_KEY` not found.")
102
- st.stop()
103
-
104
- # Quick client check
105
- with contextlib.suppress(Exception):
106
- genai.Client(api_key=api_key)
107
 
108
- uploaded = st.file_uploader("Upload CSV or XLSX dataset", ["csv", "xlsx"])
109
- user_ctx = st.text_area("Optional additional business context")
110
- run_button = st.button("πŸš€ Generate Report")
111
-
112
- # ──────────────────────────────────────────────────────────
113
- # 4️⃣ Main logic
114
- # ──────────────────────────────────────────────────────────
115
  if run_button:
116
- # β€Ίβ€Ί 4.1 read file
117
  if not uploaded:
118
- st.warning("⚠️ Please upload a dataset first.")
119
  st.stop()
120
 
121
  try:
@@ -127,24 +94,25 @@ if run_button:
127
  st.error(f"Failed to read file: {e}")
128
  st.stop()
129
 
130
- # β€Ίβ€Ί 4.2 build agents
 
131
  instruction = """
132
- You are a senior business analyst. Write an executive-level **Markdown** report
133
  covering descriptive statistics, key insights, and recommendations.
134
-
135
- When you need a visual, insert a placeholder exactly like:
136
- <generate_chart: "bar chart of total_sales by region">
137
  """
138
  report_agent = LlmAgent(
139
  name="ReportAgent",
140
  model="gemini-2.5-flash",
141
  description="Creates an executive business analysis report in Markdown",
142
- instruction=instruction,
143
  )
144
- root_agent = SequentialAgent("ReportPipeline", [report_agent])
 
145
 
146
- # β€Ίβ€Ί 4.3 context
147
- ctx = {
148
  "dataset_info": {
149
  "shape": df.shape,
150
  "columns": list(df.columns),
@@ -152,69 +120,100 @@ When you need a visual, insert a placeholder exactly like:
152
  "missing": df.isna().sum().to_dict(),
153
  },
154
  "user_context": user_ctx,
155
- "preview": df.head().to_dict(),
156
  }
157
 
158
- with st.spinner("πŸ” Running AI analysis …"):
159
- report_md = asyncio.run(run_with_runner_async(root_agent, ctx))
 
 
 
160
 
161
  if not report_md:
162
- st.error("⚠️ No response from AI.")
163
  st.stop()
164
 
165
- # β€Ίβ€Ί 4.4 initialise helpers
166
- pandas_agent = create_pandas_dataframe_agent(
167
- llm=ChatGoogleGenerativeAI(model="gemini-2.0-flash",
168
- google_api_key=api_key),
169
- df=df,
170
- verbose=False,
171
- allow_dangerous_code=True,
172
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
  pdf = PDF()
175
- pdf.set_auto_page_break(True, margin=15)
176
- pdf.add_font(FONT_NAME, "", str(FONT_REGULAR_TTF), uni=True)
177
  pdf.add_font(FONT_NAME, "B", str(FONT_BOLD_TTF), uni=True)
178
  pdf.set_fallback_fonts([FONT_NAME])
179
  pdf.add_page()
180
-
181
- # β€Ίβ€Ί 4.5 write title
182
- pdf.set_font(FONT_NAME, "B", 16)
183
- pdf.cell(0, 10, "AI-Generated Business Report", ln=True)
184
  pdf.ln(4)
185
 
186
- # β€Ίβ€Ί 4.6 walk through the Markdown, replacing chart tags in-place
187
- segments = re.split(CHART_TAG_REGEX, report_md)
188
-
189
- for idx, segment in enumerate(segments):
190
- # even indices β†’ text, odd β†’ chart description
191
- if idx % 2 == 0:
192
- if segment.strip():
193
- st.markdown(segment, unsafe_allow_html=True)
194
- add_markdown_fragment(pdf, segment)
195
- else:
196
- desc = segment.strip()
197
- # Generate the chart
198
- _ = pandas_agent.run(f"Create a {desc} using matplotlib")
199
- fig = plt.gcf()
200
- st.pyplot(fig, use_container_width=True)
201
-
202
- # Save & insert into PDF
203
- img_path = save_current_figure()
204
- pdf.image(str(img_path), w=pdf.epw)
205
- pdf.ln(4)
206
- plt.close()
207
-
208
- # β€Ίβ€Ί 4.7 stream PDF to user
209
- raw_pdf = pdf.output(dest="S")
210
- pdf_bytes = (
211
- raw_pdf.encode("latin-1", "replace") # PyFPDF 1.x
212
- if isinstance(raw_pdf, str) else raw_pdf # fpdf2 2.x
213
- )
214
-
215
- st.download_button(
216
- "⬇️ Download PDF",
217
- pdf_bytes,
218
- file_name="business_report.pdf",
219
- mime="application/pdf",
220
- )
 
 
 
 
 
 
 
 
1
  import os, re, json, uuid, tempfile, asyncio
2
  from pathlib import Path
 
3
 
4
  import pandas as pd
5
  import streamlit as st
 
7
  matplotlib.use("Agg")
8
  import matplotlib.pyplot as plt
9
 
10
+ from fpdf import FPDF, HTMLMixin
11
+ from markdown import markdown # NEW
12
+ from markdown_it import MarkdownIt # prettier HTML (optional)
13
 
14
+ # ──────────────────────────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
15
  # 1️⃣ Environment & font setup
16
+ # ──────────────────────────────────────────────────────────────────────────────
17
  os.environ["STREAMLIT_CONFIG_DIR"] = tempfile.gettempdir()
18
  os.environ["MPLCONFIGDIR"] = tempfile.gettempdir()
19
 
20
+ FONT_DIR = Path(__file__).parent
21
+ FONT_NAME = "NotoSans"
22
+ FONT_REGULAR_TTF = FONT_DIR / "NotoSans-Regular.ttf"
23
+ FONT_BOLD_TTF = FONT_DIR / "NotoSans-Bold.ttf"
24
 
25
+ # ──────────────────────────────────────────────────────────────────────────────
26
+ # 2️⃣ Streamlit UI
27
+ # ──────────────────────────────────────────────────────────────────────────────
28
+ st.set_page_config(page_title="AI Business Report", layout="wide")
29
+ st.title("πŸ“Š AI-Generated Business Report")
 
30
 
31
+ api_key = os.getenv("GEMINI_API_KEY")
32
+ if not api_key:
33
+ st.error("❌ Environment variable `GEMINI_API_KEY` not found.")
34
+ st.stop()
35
 
36
+ # Quick Gemini client check
37
+ from google import genai
38
+ try:
39
+ genai.Client(api_key=api_key)
40
+ except Exception as e:
41
+ st.exception(e)
42
+ st.stop()
43
 
44
+ uploaded = st.file_uploader("Upload CSV or XLSX dataset", ["csv", "xlsx"])
45
+ user_ctx = st.text_area("Optional additional business context")
46
+ run_button = st.button("πŸš€ Generate Report")
47
+
48
+ # ──────────────────────────────────────────────────────────────────────────────
49
+ # 3️⃣ Async helper to drive ADK Runner (unchanged)
50
+ # ──���───────────────────────────────────────────────────────────────────────────
51
+ async def run_with_runner_async(root_agent, context_data):
52
+ from google.adk.sessions import InMemorySessionService
53
+ from google.adk.runners import Runner
54
+ from google.genai import types
55
 
 
56
  svc = InMemorySessionService()
57
  app_name = "business_report_app"
58
  user_id = "user1"
 
67
  session_service=svc)
68
 
69
  content = types.Content(role="user",
70
+ parts=[types.Part(text=json.dumps(context_data))])
71
 
72
  async for event in runner.run_async(user_id=user_id,
73
  session_id=session_id,
74
  new_message=content):
75
  if event.is_final_response():
76
  return event.content.parts[0].text
77
+ return None # should not occur
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
+ # ──────────────────────────────────────────────────────────────────────────────
80
+ # 4️⃣ Main execution block
81
+ # ──────────────────────────────────────────────────────────────────────────────
 
 
 
 
82
  if run_button:
83
+ # 4.1 Load dataset
84
  if not uploaded:
85
+ st.warning("⚠️ Please upload a dataset first.")
86
  st.stop()
87
 
88
  try:
 
94
  st.error(f"Failed to read file: {e}")
95
  st.stop()
96
 
97
+ # 4.2 Build the report-writing agent (unchanged)
98
+ from google.adk.agents import LlmAgent, SequentialAgent
99
  instruction = """
100
+ You are a senior business analyst. Write an executive-level Markdown report
101
  covering descriptive statistics, key insights, and recommendations.
102
+ Insert placeholder tags for visualisations like:
103
+ <generate_chart: "bar chart of total_sales by region">
 
104
  """
105
  report_agent = LlmAgent(
106
  name="ReportAgent",
107
  model="gemini-2.5-flash",
108
  description="Creates an executive business analysis report in Markdown",
109
+ instruction=instruction
110
  )
111
+ root_agent = SequentialAgent(name="ReportPipeline",
112
+ sub_agents=[report_agent])
113
 
114
+ # 4.3 Context passed to Gemini
115
+ context_data = {
116
  "dataset_info": {
117
  "shape": df.shape,
118
  "columns": list(df.columns),
 
120
  "missing": df.isna().sum().to_dict(),
121
  },
122
  "user_context": user_ctx,
123
+ "preview": df.head().to_dict()
124
  }
125
 
126
+ # 4.4 Run the AI pipeline
127
+ with st.spinner("πŸ” Running AI analysis…"):
128
+ report_md = asyncio.run(
129
+ run_with_runner_async(root_agent, context_data)
130
+ )
131
 
132
  if not report_md:
133
+ st.error("⚠️ No response from AI.")
134
  st.stop()
135
 
136
+ # 4.5 Parse chart placeholders & generate charts β˜… REWORKED β˜…
137
+ chart_tags = re.findall(r'<generate_chart:\s*"([^"]+)"\s*>', report_md)
138
+ tag_to_path = {} # desc β†’ temp PNG path
139
+ chart_html_snippets = {} # desc β†’ <img …>
140
+
141
+ if chart_tags:
142
+ from langchain_experimental.agents import create_pandas_dataframe_agent
143
+ from langchain_google_genai import ChatGoogleGenerativeAI
144
+
145
+ pandas_agent = create_pandas_dataframe_agent(
146
+ llm=ChatGoogleGenerativeAI(model="gemini-2.0-flash",
147
+ google_api_key=api_key),
148
+ df=df,
149
+ verbose=False,
150
+ allow_dangerous_code=True
151
+ )
152
+ st.subheader("πŸ“ˆ Generated Charts")
153
+
154
+ for desc in chart_tags:
155
+ # LLM draws the chart with matplotlib
156
+ _ = pandas_agent.run(f"Create a {desc} using matplotlib")
157
+ fig = plt.gcf()
158
+
159
+ tmp_path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
160
+ fig.savefig(tmp_path, dpi=300, bbox_inches="tight")
161
+ plt.close()
162
+
163
+ tag_to_path[desc] = str(tmp_path)
164
+ chart_html_snippets[desc] = (
165
+ f'<img src="{tmp_path}" width="100%" style="margin:10px 0;" />'
166
+ )
167
+
168
+ # Immediate visual feedback in Streamlit
169
+ st.image(tmp_path, caption=desc, use_column_width=True)
170
+
171
+ # 4.6 Replace tags with <img> HTML
172
+ for desc, html_img in chart_html_snippets.items():
173
+ report_md = re.sub(
174
+ rf'<generate_chart:\s*"{re.escape(desc)}"\s*>',
175
+ html_img,
176
+ report_md
177
+ )
178
+
179
+ # 4.7 Show nicely-rendered preview in Streamlit
180
+ st.subheader("πŸ”– Draft Report")
181
+ st.markdown(report_md, unsafe_allow_html=True)
182
+
183
+ # 4.8 Build PDF with HTML rendering (Markdown β†’ HTML) β˜… NEW SECTION β˜…
184
+ class PDF(FPDF, HTMLMixin):
185
+ pass
186
 
187
  pdf = PDF()
188
+ pdf.set_auto_page_break(auto=True, margin=15)
189
+ pdf.add_font(FONT_NAME, "", str(FONT_REGULAR_TTF), uni=True)
190
  pdf.add_font(FONT_NAME, "B", str(FONT_BOLD_TTF), uni=True)
191
  pdf.set_fallback_fonts([FONT_NAME])
192
  pdf.add_page()
193
+ pdf.set_font(FONT_NAME, "B", 18)
194
+ pdf.cell(0, 12, "AI-Generated Business Report", ln=True)
 
 
195
  pdf.ln(4)
196
 
197
+ # Markdown β†’ HTML
198
+ # (use markdown-it-py if available – better table support)
199
+ if 'MarkdownIt' in globals():
200
+ md2html = MarkdownIt("commonmark", {"breaks": True}).enable("table")
201
+ html_body = md2html.render(report_md)
202
+ else:
203
+ html_body = markdown(report_md, extensions=["tables"])
204
+
205
+ # For safety: replace bullets β€œβ€’β€ with unicode bullet char if markdown() escaped it
206
+ html_body = html_body.replace("&bull;", "β€’")
207
+
208
+ pdf.set_font(FONT_NAME, "", 11)
209
+ pdf.write_html(html_body)
210
+
211
+ # 4.9 Produce bytes without re-encoding to latin-1
212
+ pdf_bytes = pdf.output(dest="S")
213
+ if not isinstance(pdf_bytes, (bytes, bytearray)):
214
+ pdf_bytes = bytes(pdf_bytes, "utf-8") # never latin-1!
215
+
216
+ st.download_button("⬇️ Download PDF",
217
+ pdf_bytes,
218
+ file_name="business_report.pdf",
219
+ mime="application/pdf")