rairo commited on
Commit
3f375e9
Β·
verified Β·
1 Parent(s): f6ef3b3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -91
app.py CHANGED
@@ -7,6 +7,7 @@
7
  # ────────────────────────────────────────────────────────────────────────────────
8
  import os, re, json, uuid, tempfile, asyncio
9
  from pathlib import Path
 
10
 
11
  import pandas as pd
12
  import streamlit as st
@@ -14,7 +15,8 @@ import matplotlib
14
  matplotlib.use("Agg")
15
  import matplotlib.pyplot as plt
16
 
17
- from fpdf import FPDF
 
18
 
19
  # Google AI & ADK
20
  from google import genai
@@ -23,47 +25,33 @@ from google.adk.runners import Runner
23
  from google.adk.sessions import InMemorySessionService
24
  from google.genai import types
25
 
26
- # LangChain helper for pandas + Gemini
27
  from langchain_experimental.agents import create_pandas_dataframe_agent
28
  from langchain_google_genai import ChatGoogleGenerativeAI
29
 
30
- # ------------------------------------------------------------------------------
31
  # 1️⃣ Environment & font setup
32
- # ------------------------------------------------------------------------------
33
  os.environ["STREAMLIT_CONFIG_DIR"] = tempfile.gettempdir()
34
  os.environ["MPLCONFIGDIR"] = tempfile.gettempdir()
35
 
36
- FONT_DIR = Path(__file__).parent # adjust if fonts in sub-dir
37
- FONT_NAME = "NotoSans"
38
- FONT_REGULAR_TTF = FONT_DIR / "NotoSans-Regular.ttf"
39
- FONT_BOLD_TTF = FONT_DIR / "NotoSans-Bold.ttf"
40
 
41
- # ------------------------------------------------------------------------------
42
- # 2️⃣ Streamlit UI
43
- # ------------------------------------------------------------------------------
44
- st.set_page_config(page_title="AI Business Report", layout="wide")
45
- st.title("πŸ“Š AI-Generated Business Report")
 
46
 
47
- api_key = os.getenv("GEMINI_API_KEY")
48
- if not api_key:
49
- st.error("❌ Environment variable `GEMINI_API_KEY` not found.")
50
- st.stop()
51
 
52
- # Quick client check
53
- try:
54
- genai.Client(api_key=api_key)
55
- except Exception as e:
56
- st.exception(e)
57
- st.stop()
58
 
59
- uploaded = st.file_uploader("Upload CSV or XLSX dataset", ["csv", "xlsx"])
60
- user_ctx = st.text_area("Optional additional business context")
61
- run_button = st.button("πŸš€ Generate Report")
62
 
63
- # ------------------------------------------------------------------------------
64
- # 3️⃣ Async helper to drive ADK Runner
65
- # ------------------------------------------------------------------------------
66
- async def run_with_runner_async(root_agent, context_data):
67
  svc = InMemorySessionService()
68
  app_name = "business_report_app"
69
  user_id = "user1"
@@ -78,24 +66,56 @@ async def run_with_runner_async(root_agent, context_data):
78
  session_service=svc)
79
 
80
  content = types.Content(role="user",
81
- parts=[types.Part(text=json.dumps(context_data))])
82
 
83
  async for event in runner.run_async(user_id=user_id,
84
  session_id=session_id,
85
  new_message=content):
86
  if event.is_final_response():
87
  return event.content.parts[0].text
88
- return None # fall-through (shouldn’t happen)
 
89
 
 
 
 
 
90
 
91
 
92
- # ------------------------------------------------------------------------------
93
- # 4️⃣ Main execution block
94
- # ------------------------------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  if run_button:
96
- # 4.1 Load dataset
97
  if not uploaded:
98
- st.warning("⚠️ Please upload a dataset first.")
99
  st.stop()
100
 
101
  try:
@@ -107,24 +127,24 @@ if run_button:
107
  st.error(f"Failed to read file: {e}")
108
  st.stop()
109
 
110
- # 4.2 Build the report-writing agent
111
  instruction = """
112
- You are a senior business analyst. Write an executive-level Markdown report
113
  covering descriptive statistics, key insights, and recommendations.
114
- Insert placeholder tags for visualisations like:
115
- <generate_chart: "bar chart of total_sales by region">
116
- """
 
117
  report_agent = LlmAgent(
118
  name="ReportAgent",
119
  model="gemini-2.5-flash",
120
  description="Creates an executive business analysis report in Markdown",
121
- instruction=instruction
122
  )
123
- root_agent = SequentialAgent(name="ReportPipeline",
124
- sub_agents=[report_agent])
125
 
126
- # 4.3 Context passed to Gemini
127
- context_data = {
128
  "dataset_info": {
129
  "shape": df.shape,
130
  "columns": list(df.columns),
@@ -132,59 +152,69 @@ Insert placeholder tags for visualisations like:
132
  "missing": df.isna().sum().to_dict(),
133
  },
134
  "user_context": user_ctx,
135
- "preview": df.head().to_dict()
136
  }
137
 
138
- # 4.4 Run the AI pipeline
139
- with st.spinner("πŸ” Running AI analysis…"):
140
- report_md = asyncio.run(
141
- run_with_runner_async(root_agent, context_data)
142
- )
143
 
144
  if not report_md:
145
- st.error("⚠️ No response from AI.")
146
  st.stop()
147
 
148
- st.subheader("πŸ”– Draft Report")
149
- st.markdown(report_md, unsafe_allow_html=True)
150
-
151
- # 4.5 Parse chart placeholders & generate with pandas agent
152
- chart_prompts = re.findall(r'<generate_chart:\s*"([^"]+)"\s*>', report_md)
153
- if chart_prompts:
154
- st.subheader("πŸ“ˆ Generated Charts")
155
- pandas_agent = create_pandas_dataframe_agent(
156
- llm=ChatGoogleGenerativeAI(model="gemini-2.0-flash",
157
- google_api_key=api_key),
158
- df=df,
159
- verbose=False,
160
- allow_dangerous_code=True
161
- )
162
- for desc in chart_prompts:
163
- _ = pandas_agent.run(f"Create a {desc} using matplotlib")
164
- fig = plt.gcf()
165
- st.pyplot(fig, use_container_width=True)
166
- plt.close()
167
-
168
- # 4.6 Build PDF with Noto Sans (Unicode-safe)
169
- pdf = FPDF()
170
- pdf.set_auto_page_break(auto=True, margin=15)
171
 
172
- pdf.add_font(FONT_NAME, "", str(FONT_REGULAR_TTF), uni=True)
 
 
173
  pdf.add_font(FONT_NAME, "B", str(FONT_BOLD_TTF), uni=True)
174
  pdf.set_fallback_fonts([FONT_NAME])
175
-
176
  pdf.add_page()
 
 
177
  pdf.set_font(FONT_NAME, "B", 16)
178
  pdf.cell(0, 10, "AI-Generated Business Report", ln=True)
179
- pdf.ln(5)
180
-
181
- pdf.set_font(FONT_NAME, "", 11)
182
- pdf.multi_cell(0, 5, report_md)
183
-
184
- pdf_bytes = pdf.output(dest="S") # returns bytearray / bytes
185
- # If you insist on a bytes object:
186
- pdf_bytes = bytes(pdf_bytes) # no-op when it’s already bytes
187
- st.download_button("⬇️ Download PDF",
188
- pdf_bytes,
189
- file_name="business_report.pdf",
190
- mime="application/pdf")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  # ────────────────────────────────────────────────────────────────────────────────
8
  import os, re, json, uuid, tempfile, asyncio
9
  from pathlib import Path
10
+ import contextlib
11
 
12
  import pandas as pd
13
  import streamlit as st
 
15
  matplotlib.use("Agg")
16
  import matplotlib.pyplot as plt
17
 
18
+ from fpdf import FPDF, HTMLMixin # ➜ use HTML engine
19
+ from markdown import markdown # ➜ md β†’ html
20
 
21
  # Google AI & ADK
22
  from google import genai
 
25
  from google.adk.sessions import InMemorySessionService
26
  from google.genai import types
27
 
28
+ # LangChain helper
29
  from langchain_experimental.agents import create_pandas_dataframe_agent
30
  from langchain_google_genai import ChatGoogleGenerativeAI
31
 
32
+ # ──────────────────────────────────────────────────────────
33
  # 1️⃣ Environment & font setup
34
+ # ──────────────────────────────────────────────────────────
35
  os.environ["STREAMLIT_CONFIG_DIR"] = tempfile.gettempdir()
36
  os.environ["MPLCONFIGDIR"] = tempfile.gettempdir()
37
 
38
+ HERE = Path(__file__).parent
39
+ FONT_NAME = "NotoSans"
40
+ FONT_REGULAR_TTF = HERE / "NotoSans-Regular.ttf"
41
+ FONT_BOLD_TTF = HERE / "NotoSans-Bold.ttf"
42
 
43
+ # ──────────────────────────────────────────────────────────
44
+ # 2️⃣ Little helpers
45
+ # ──────────────────────────────────────────────────────────
46
+ class PDF(FPDF, HTMLMixin):
47
+ """FPDF subclass with HTML support and sensible defaults."""
48
+ pass
49
 
 
 
 
 
50
 
51
+ CHART_TAG_REGEX = r'<generate_chart:\s*"([^"]+)"\s*>'
 
 
 
 
 
52
 
 
 
 
53
 
54
+ async def run_with_runner_async(root_agent, context):
 
 
 
55
  svc = InMemorySessionService()
56
  app_name = "business_report_app"
57
  user_id = "user1"
 
66
  session_service=svc)
67
 
68
  content = types.Content(role="user",
69
+ parts=[types.Part(text=json.dumps(context))])
70
 
71
  async for event in runner.run_async(user_id=user_id,
72
  session_id=session_id,
73
  new_message=content):
74
  if event.is_final_response():
75
  return event.content.parts[0].text
76
+ return None
77
+
78
 
79
+ def add_markdown_fragment(pdf: PDF, md_text: str):
80
+ """Render a chunk of Markdown into the PDF using HTMLMixin."""
81
+ html = markdown(md_text, extensions=["tables", "fenced_code"])
82
+ pdf.write_html(html)
83
 
84
 
85
+ def save_current_figure() -> Path:
86
+ """Save the current matplotlib figure to a PNG and return the path."""
87
+ path = Path(tempfile.gettempdir()) / f"{uuid.uuid4()}.png"
88
+ plt.tight_layout()
89
+ plt.savefig(path, dpi=300, bbox_inches="tight")
90
+ return path
91
+
92
+
93
+ # ──────────────────────────────────────────────────────────
94
+ # 3️⃣ Streamlit UI
95
+ # ──────────────────────────────────────────────────────────
96
+ st.set_page_config(page_title="πŸ“Š AI Business Report", layout="wide")
97
+ st.title("πŸ“Š AI-Generated Business Report")
98
+
99
+ api_key = os.getenv("GEMINI_API_KEY")
100
+ if not api_key:
101
+ st.error("❌ Environment variable `GEMINI_API_KEY` not found.")
102
+ st.stop()
103
+
104
+ # Quick client check
105
+ with contextlib.suppress(Exception):
106
+ genai.Client(api_key=api_key)
107
+
108
+ uploaded = st.file_uploader("Upload CSV or XLSX dataset", ["csv", "xlsx"])
109
+ user_ctx = st.text_area("Optional additional business context")
110
+ run_button = st.button("πŸš€ Generate Report")
111
+
112
+ # ──────────────────────────────────────────────────────────
113
+ # 4️⃣ Main logic
114
+ # ──────────────────────────────────────────────────────────
115
  if run_button:
116
+ # β€Ίβ€Ί 4.1 read file
117
  if not uploaded:
118
+ st.warning("⚠️ Please upload a dataset first.")
119
  st.stop()
120
 
121
  try:
 
127
  st.error(f"Failed to read file: {e}")
128
  st.stop()
129
 
130
+ # β€Ίβ€Ί 4.2 build agents
131
  instruction = """
132
+ You are a senior business analyst. Write an executive-level **Markdown** report
133
  covering descriptive statistics, key insights, and recommendations.
134
+
135
+ When you need a visual, insert a placeholder exactly like:
136
+ <generate_chart: "bar chart of total_sales by region">
137
+ """
138
  report_agent = LlmAgent(
139
  name="ReportAgent",
140
  model="gemini-2.5-flash",
141
  description="Creates an executive business analysis report in Markdown",
142
+ instruction=instruction,
143
  )
144
+ root_agent = SequentialAgent("ReportPipeline", [report_agent])
 
145
 
146
+ # β€Ίβ€Ί 4.3 context
147
+ ctx = {
148
  "dataset_info": {
149
  "shape": df.shape,
150
  "columns": list(df.columns),
 
152
  "missing": df.isna().sum().to_dict(),
153
  },
154
  "user_context": user_ctx,
155
+ "preview": df.head().to_dict(),
156
  }
157
 
158
+ with st.spinner("πŸ” Running AI analysis …"):
159
+ report_md = asyncio.run(run_with_runner_async(root_agent, ctx))
 
 
 
160
 
161
  if not report_md:
162
+ st.error("⚠️ No response from AI.")
163
  st.stop()
164
 
165
+ # β€Ίβ€Ί 4.4 initialise helpers
166
+ pandas_agent = create_pandas_dataframe_agent(
167
+ llm=ChatGoogleGenerativeAI(model="gemini-2.0-flash",
168
+ google_api_key=api_key),
169
+ df=df,
170
+ verbose=False,
171
+ allow_dangerous_code=True,
172
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
+ pdf = PDF()
175
+ pdf.set_auto_page_break(True, margin=15)
176
+ pdf.add_font(FONT_NAME, "", str(FONT_REGULAR_TTF), uni=True)
177
  pdf.add_font(FONT_NAME, "B", str(FONT_BOLD_TTF), uni=True)
178
  pdf.set_fallback_fonts([FONT_NAME])
 
179
  pdf.add_page()
180
+
181
+ # β€Ίβ€Ί 4.5 write title
182
  pdf.set_font(FONT_NAME, "B", 16)
183
  pdf.cell(0, 10, "AI-Generated Business Report", ln=True)
184
+ pdf.ln(4)
185
+
186
+ # β€Ίβ€Ί 4.6 walk through the Markdown, replacing chart tags in-place
187
+ segments = re.split(CHART_TAG_REGEX, report_md)
188
+
189
+ for idx, segment in enumerate(segments):
190
+ # even indices β†’ text, odd β†’ chart description
191
+ if idx % 2 == 0:
192
+ if segment.strip():
193
+ st.markdown(segment, unsafe_allow_html=True)
194
+ add_markdown_fragment(pdf, segment)
195
+ else:
196
+ desc = segment.strip()
197
+ # Generate the chart
198
+ _ = pandas_agent.run(f"Create a {desc} using matplotlib")
199
+ fig = plt.gcf()
200
+ st.pyplot(fig, use_container_width=True)
201
+
202
+ # Save & insert into PDF
203
+ img_path = save_current_figure()
204
+ pdf.image(str(img_path), w=pdf.epw)
205
+ pdf.ln(4)
206
+ plt.close()
207
+
208
+ # β€Ίβ€Ί 4.7 stream PDF to user
209
+ raw_pdf = pdf.output(dest="S")
210
+ pdf_bytes = (
211
+ raw_pdf.encode("latin-1", "replace") # PyFPDF 1.x
212
+ if isinstance(raw_pdf, str) else raw_pdf # fpdf2 2.x
213
+ )
214
+
215
+ st.download_button(
216
+ "⬇️ Download PDF",
217
+ pdf_bytes,
218
+ file_name="business_report.pdf",
219
+ mime="application/pdf",
220
+ )