devesh1011 commited on
Commit
b694626
·
1 Parent(s): 2797026

Initial Commit

Browse files
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ __pycache__
2
+ .venv
3
+ .env
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.12
agents/info_extractor_agent.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agno.agent import Agent
2
+ from agno.models.google import Gemini
3
+ from textwrap import dedent
4
+
5
+ info_extractor = Agent(
6
+ model=Gemini(id="gemini-2.0-flash-001", response_modalities=["text"]),
7
+ description=dedent(
8
+ """\
9
+ Extracts key information from product demo call transcriptions.
10
+ Analyzes conversation context to identify product details, prospect pain points,
11
+ demonstrated features with timestamps, and actionable next steps, structuring
12
+ the output for microsite generation."""
13
+ ),
14
+ instructions=dedent(
15
+ """\
16
+ Given a timestamped product demo call transcription, extract the following information.
17
+ Format your response strictly as a JSON object validated by the `DemoSummary` Pydantic model.
18
+
19
+ **Extraction Rules:**
20
+ 1. **Product Name:** Identify the primary product or solution discussed.
21
+ 2. **Prospect Company:** Determine the name of the prospective customer's organization.
22
+ 3. **Sales Rep:** Identify the name of the sales representative.
23
+ 4. **Summary Points:** Provide 3-5 concise, high-level bullet points summarizing the entire demo.
24
+ 5. **Pain Points Discussed:** List specific challenges or problems the prospect mentioned.
25
+ 6. **Features Demonstrated:** For each feature explicitly shown or discussed in detail, provide a dictionary with 'name' (the feature name), 'timestamp_start' (start time, e.g., '00:05:10'), and 'timestamp_end' (end time, e.g., '00:08:45'). If a feature is mentioned but not demonstrated, do not include timestamps.
26
+ 7. **Next Steps:** List any clear action items or agreed-upon follow-ups for either party.
27
+ 8. **Unanswered Questions:** List any specific questions posed by the prospect that were not fully resolved during the call.
28
+ 9. **Strict JSON Output:** Ensure the output is valid JSON and perfectly matches the structure defined by the `DemoSummary` model. Do not include any extra text or conversational filler outside the JSON.
29
+ """
30
+ ),
31
+ # response_model=DemoSummary,
32
+ )
agents/site_builder_agent.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agno.agent import Agent
2
+ from agno.models.google import Gemini
3
+ from textwrap import dedent
4
+ from pydantic import BaseModel, Field
5
+
6
+
7
+ class HtmlContent(BaseModel):
8
+ content: str = Field(
9
+ ..., description="The generated HTML content for the microsite."
10
+ )
11
+
12
+
13
+ microsite_builder_agent = Agent(
14
+ model=Gemini(id="gemini-2.0-flash-001", response_modalities=["text"]),
15
+ description=dedent(
16
+ """\
17
+ Generates a personalized, interactive HTML microsite from demo call data.
18
+ It combines structured extracted information with raw transcription details
19
+ to create a visually appealing and informative recap page for prospects."""
20
+ ),
21
+ instructions=dedent(
22
+ f"""\
23
+ You are an expert web developer specializing in creating concise, engaging, and personalized microsites for product demo recaps.
24
+
25
+ **Your Task:**
26
+ Generate a complete, single-page HTML document for a product demo recap microsite.
27
+ The HTML should be fully self-contained (no external CSS files, use Tailwind CSS CDN).
28
+ It must be responsive, visually appealing, and **have clean, minimal formatting (avoid excessive newlines or unnecessary whitespace)**.
29
+
30
+ **Inputs:**
31
+ - `extracted_info_json`: A JSON string containing structured data about the demo (product, prospect, features, pain points, next steps, etc.).
32
+ - `raw_transcription`: The full, verbatim transcription of the demo call, including timestamps and speaker identification. This is crucial for creating "Watch this moment" links.
33
+
34
+ **Microsite Structure & Content Requirements:**
35
+
36
+ 1. **HTML Boilerplate:** Include `<!DOCTYPE html>`, `<html>`, `<head>`, `<body>`.
37
+ 2. **Meta Tags:** Include `viewport` for responsiveness.
38
+ 3. **Title:** Use the `product_name` and `prospect_company` for the page title.
39
+ 4. **Tailwind CSS:** Load from CDN: `<script src="https://cdn.tailwindcss.com"></script>`.
40
+ 5. **Font:** Load Inter font via Google Fonts CDN in `<head>` and apply `font-family: 'Inter', sans-serif;` via a `<style>` block.
41
+ 6. **Overall Styling:**
42
+ * Use a clean, modern design with `bg-gray-100` for the body.
43
+ * Content should be in a white card (`bg-white rounded-lg shadow-md`) with good padding.
44
+ * Apply rounded corners to elements.
45
+ * Ensure appropriate spacing (padding, margin classes).
46
+ * Center text for headers and CTAs.
47
+ 7. **Header Section:**
48
+ * Prominent `<h1>` for the recap title (e.g., "Recap for [Prospect Company] - [Product Name] Demo").
49
+ * `<p>` tag for "Presented by [Sales Rep's Name] ([Product Name])".
50
+ 8. **Summary Section (`<section>`):**
51
+ * `<h2>` title: "Key Summary Points".
52
+ * Unordered list (`<ul>`) with `list-disc list-inside` for `summary_points`.
53
+ 9. **Pain Points Discussed Section (`<section>`):**
54
+ * `<h2>` title: "Pain Points Discussed".
55
+ * Unordered list (`<ul>`) with `list-disc list-inside` for `pain_points_discussed`.
56
+ 10. **Features Demonstrated Section (`<section>`):**
57
+ * `<h2>` title: "Features Demonstrated".
58
+ * If `features_demonstrated` is empty, use a `<p>` tag: "No features were explicitly demonstrated in this call."
59
+ * If features exist, use an unordered list (`<ul>`). For each feature:
60
+ * Display `name`.
61
+ * Create a button/link `<a>` with Tailwind classes (e.g., `inline-block bg-blue-500 hover:bg-blue-600 text-white text-xs font-semibold py-1 px-2 rounded ml-2`) labeled "Watch this moment".
62
+ * The `href` for this link MUST be `{{demo_recording_url}}#t={{timestamp_start_in_seconds}}`. Convert `HH:MM:SS` to total seconds for the hash (e.g., 00:00:30 becomes 30).
63
+ 11. **Next Steps Section (`<section>`):**
64
+ * `<h2>` title: "Next Steps".
65
+ * Unordered list (`<ul>`) with `list-disc list-inside` for `next_steps`.
66
+ 12. **Call to Action (CTA) (`<div>`):**
67
+ * Centered `<div>`.
68
+ * A prominent button `<a>` with Tailwind classes (e.g., `bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded`) labeled "Schedule a Follow-Up". This can point to a placeholder link (`#`).
69
+ 13. **Strict HTML Output:** Output ONLY the complete HTML document. Do not include any other text, preambles, explanations, or conversational filler outside the HTML. **Ensure minimal newlines and whitespace within the HTML for a compact output.**
70
+ """
71
+ ),
72
+ response_model=HtmlContent, # Agent will return an HtmlContent object containing the raw HTML string
73
+ )
agents/transcription_agent.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agno.agent import Agent
2
+ from agno.models.google import Gemini
3
+ from textwrap import dedent
4
+ from pydantic import BaseModel
5
+
6
+
7
+ class Transcription(BaseModel):
8
+ transcription: str
9
+
10
+
11
+ transcription_agent = Agent(
12
+ model=Gemini(id="gemini-2.0-flash-lite", response_modalities=["text"]),
13
+ description=dedent(
14
+ """\
15
+ Highly accurate, verbatim audio-to-text transcription service.
16
+ Converts spoken words into a detailed textual record, preserving crucial temporal context and speaker identification."""
17
+ ),
18
+ instructions=dedent(
19
+ """\
20
+ Strictly follow these rules for verbatim transcription with timestamps and speaker identification.
21
+ Output the transcription as a continuous string, with each segment on a new line.
22
+
23
+ **Output Format:**
24
+ [HH:MM:SS - HH:MM:SS] Speaker Name: Transcribed verbatim speech
25
+
26
+ **Transcription Rules (Strictly Adhere to All):**
27
+
28
+ 1. **Verbatim Accuracy:** Transcribe every single word exactly as heard.
29
+ 2. **No Interpretation/Summarization:** Do not summarize, interpret, or rephrase speech. Transcribe only what is explicitly said.
30
+ 3. **Unclear Speech:** Use '[inaudible]' for any speech that cannot be clearly understood.
31
+ 4. **Pauses:** Indicate pauses longer than 2 seconds with '...' (three periods) directly within the transcribed text.
32
+ 5. **No Punctuation/Formatting:** Do not add any punctuation (commas, periods, question marks, etc.) or apply any text formatting (bold, italics).
33
+ 6. **Preserve Filler Words:** Include all filler words (e.g., 'um', 'uh', 'like', 'you know').
34
+
35
+ **Example of Desired Output:**
36
+ [00:00:00 - 00:00:05] Sales Rep: Good morning Jane thanks for joining the call
37
+ [00:00:05 - 00:00:12] Prospect: Hi Alice excited to learn more about the Microsite Pilot
38
+ [00:00:12 - 00:00:25] Sales Rep: Great today we're going to focus on how we automate post-demo follow-ups
39
+ [00:00:25 - 00:00:30] Prospect: My biggest pain point is the time spent summarizing
40
+ [00:00:30 - 00:00:45] Sales Rep: Exactly our key feature is the 'Instant Microsite Generation' let me show you that
41
+ """
42
+ ),
43
+ response_model=Transcription,
44
+ )
app.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from workflow import (
3
+ MicroSiteGenerator,
4
+ ) # Make sure workflow.py is in the same directory or accessible
5
+ from agno.workflow import RunEvent
6
+ import os
7
+ from dotenv import load_dotenv
8
+ import traceback # Import traceback for detailed error logging
9
+
10
+ # Load environment variables from .env file, if present
11
+ load_dotenv()
12
+
13
+ # Instantiate the workflow
14
+ # This will also initialize the agents defined in the workflow.
15
+ microsite_workflow: MicroSiteGenerator = None
16
+ try:
17
+ microsite_workflow = MicroSiteGenerator()
18
+ except Exception as e:
19
+ print(f"Error initializing MicroSiteGenerator: {e}")
20
+ traceback.print_exc()
21
+ # Gradio UI will show a message if microsite_workflow is None
22
+
23
+
24
+ def generate_microsite_app(audio_file_obj, audio_format_str, use_cache_bool):
25
+ """
26
+ Gradio function to process audio and generate a microsite using the local workflow.
27
+ audio_file_obj: Output from gr.Audio (type="filepath").
28
+ audio_format_str: The user-selected audio format.
29
+ use_cache_bool: Boolean indicating whether to use the transcription cache.
30
+ """
31
+ if microsite_workflow is None:
32
+ return (
33
+ "Critical Error: Workflow failed to initialize. Check console logs.",
34
+ "App is not functional. Please ensure all configurations (like API keys) are correctly set.",
35
+ )
36
+
37
+ if audio_file_obj is None:
38
+ return "Status: Idle", "Please upload an audio file to begin. 🎤"
39
+
40
+ audio_source_path = audio_file_obj # This is already the path string
41
+ processing_log_entries = ["🚀 Starting microsite generation..."]
42
+ final_result_markdown = "⏳ Processing... please wait."
43
+
44
+ try:
45
+ # The run method is a generator. We iterate to get the final result.
46
+ for response in microsite_workflow.run(
47
+ audio_source=audio_source_path,
48
+ audio_format=audio_format_str.lower(), # Ensure format is lowercase
49
+ use_transcription_cache=use_cache_bool,
50
+ ):
51
+ processing_log_entries.append(f"🔄 Workflow event: {response.event.value}")
52
+
53
+ if response.event == RunEvent.workflow_completed:
54
+ content = response.content # This is site_details from the workflow
55
+ if isinstance(content, dict): # Expected site_details dictionary
56
+ if content.get("success"):
57
+ site_url = content.get("site", {}).get("url")
58
+ site_name = content.get("site", {}).get("name", "N/A")
59
+ admin_url = content.get("site", {}).get("admin_url", "#")
60
+ if site_url:
61
+ final_result_markdown = (
62
+ f"🎉 **Microsite '{site_name}' Deployed!** 🎉\n\n"
63
+ f"🔗 **Access it here:** [{site_url}]({site_url})\n\n"
64
+ f"<details><summary>ℹ️ Deployment Details (Admin Link)</summary>"
65
+ f"<p>Admin URL: <a href='{admin_url}' target='_blank' rel='noopener noreferrer'>{admin_url}</a></p>"
66
+ f"</details>"
67
+ )
68
+ processing_log_entries.append("✅ Deployment successful.")
69
+ else:
70
+ final_result_markdown = "⚠️ Microsite generated, but deployment URL was not found in the response."
71
+ processing_log_entries.append("❌ Deployment URL missing.")
72
+ else: # Deployment failed or error reported by deploy_html_file_with_digest
73
+ error_msg = content.get("message", "Unknown deployment error.")
74
+ final_result_markdown = (
75
+ f"❌ **Microsite Generation Failed:** {error_msg}"
76
+ )
77
+ processing_log_entries.append(
78
+ f"❌ Deployment/Generation failed: {error_msg}"
79
+ )
80
+ elif isinstance(content, str) and "Site was not generated" in content: # Workflow specific error string
81
+ final_result_markdown = f"❌ **Site Not Generated:** {content}. This often indicates a transcription failure or issue with the audio."
82
+ processing_log_entries.append(
83
+ f"❌ Site generation aborted: {content}"
84
+ )
85
+ else:
86
+ final_result_markdown = f"🤔 Workflow completed with unexpected content: {str(content)[:200]}..." # Truncate long content
87
+ processing_log_entries.append(
88
+ f"⚠️ Unexpected content: {str(content)[:200]}..."
89
+ )
90
+ break # Critical final event processed
91
+ else:
92
+ # Handle other potential intermediate events if the workflow is updated to yield more
93
+ # For example, if the workflow yields progress updates:
94
+ if hasattr(response.content, 'get') and response.content.get('progress_message'):
95
+ processing_log_entries.append(f"⏳ {response.content['progress_message']}")
96
+ elif isinstance(response.content, str):
97
+ processing_log_entries.append(f"ℹ️ {response.content[:100]}...")
98
+
99
+
100
+ except Exception as e:
101
+ tb_str = traceback.format_exc()
102
+ processing_log_entries.append(
103
+ f"💥 Critical error during workflow execution: {str(e)}"
104
+ )
105
+ print(f"Error in generate_microsite_app: {e}\n{tb_str}") # Log to console for debugging
106
+ final_result_markdown = (
107
+ f"💥 **An Unexpected Error Occurred!** 💥\n\n"
108
+ f"Details: `{str(e)}`\n\n"
109
+ f"Please check the console logs for more information or try again. "
110
+ f"If the problem persists, ensure all configurations and API keys are correctly set."
111
+ )
112
+
113
+ return "\n".join(processing_log_entries), final_result_markdown
114
+
115
+
116
+ # Define common audio formats
117
+ COMMON_AUDIO_FORMATS = [
118
+ "wav", "mp3", "m4a", "flac", "ogg", "aac", "opus", "amr", "webm",
119
+ ]
120
+
121
+ # Prepare environment variable warning message for the UI
122
+ missing_env_vars_messages = []
123
+ if not os.getenv("NETLIFY_PERSONAL_ACCESS_TOKEN"):
124
+ missing_env_vars_messages.append("`NETLIFY_PERSONAL_ACCESS_TOKEN` (for deploying the site to Netlify)")
125
+ if not os.getenv("GOOGLE_API_KEY"): # Common for Gemini models
126
+ missing_env_vars_messages.append("`GOOGLE_API_KEY` (for AI models like Google Gemini)")
127
+ # Add other critical env vars checks here if your workflow agents need them
128
+
129
+ env_warning_html = ""
130
+ if missing_env_vars_messages:
131
+ vars_list_html = "".join([f"<li>{var}</li>" for var in missing_env_vars_messages])
132
+ env_warning_html = (
133
+ f"<div style='background-color: #332200; color: #FFDDAA; border: 1px solid #553300; padding: 15px; margin-bottom:20px; border-radius: 5px;'>"
134
+ f"<strong>⚠️ Heads up!</strong> The application might be missing the following environment variable(s):"
135
+ f"<ul style='margin-top: 10px; margin-bottom: 0; padding-left: 20px;'>{vars_list_html}</ul>"
136
+ f"This could affect its functionality. Please ensure they are set in your environment or `.env` file."
137
+ f"</div>"
138
+ )
139
+
140
+ # Workflow description for the UI
141
+ workflow_desc_html = ""
142
+ if microsite_workflow and hasattr(microsite_workflow, "description"):
143
+ escaped_description = microsite_workflow.description.replace("\n", "<br>")
144
+ workflow_desc_html = f"""
145
+ <details style="margin-top:15px; margin-bottom:15px; padding:10px; background-color:#2a2a2a; border-radius:5px; border: 1px solid #444;">
146
+ <summary style="font-weight:bold; cursor:pointer;">📖 Click to see Workflow Details</summary>
147
+ <p style="margin-top:10px;"><em>{escaped_description}</em></p>
148
+ </details>
149
+ """
150
+ elif microsite_workflow is None:
151
+ workflow_desc_html = "<p style='color:red; font-weight:bold;'>WORKFLOW INITIALIZATION FAILED. Please check console logs for errors. API keys might be missing or other configurations might be incorrect.</p>"
152
+ else:
153
+ workflow_desc_html = "<p style='color:orange;'>Workflow description not available.</p>"
154
+
155
+
156
+ app_title = "MicrositePilot 🎙️➡️🌐"
157
+ app_intro_markdown = f"""
158
+ {env_warning_html}
159
+ Welcome to **MicrositePilot**! Upload a product demo call recording (audio file).
160
+ The AI will transcribe it, extract key information, and generate a personalized recap microsite, automatically deployed to Netlify.
161
+ {workflow_desc_html}
162
+ """
163
+
164
+ custom_css = """
165
+ body { font-family: 'Inter', sans-serif; }
166
+ .gradio-container { max-width: 900px !important; margin: auto !important; }
167
+ footer { display: none !important; } /* Hide default Gradio footer */
168
+ h1 { text-align: center; }
169
+ .gr-button { box-shadow: 0 1px 3px 0 rgba(0,0,0,.1), 0 1px 2px 0 rgba(0,0,0,.06); }
170
+ """
171
+
172
+ with gr.Blocks(theme="dark_default", css=custom_css) as demo:
173
+ gr.Markdown(f"<h1>{app_title}</h1>")
174
+ gr.HTML(app_intro_markdown)
175
+
176
+ with gr.Row(equal_height=False):
177
+ with gr.Column(scale=1, min_width=300):
178
+ gr.Markdown("### 📤 Step 1: Upload Audio")
179
+ audio_input = gr.Audio(
180
+ type="filepath",
181
+ label="Product Demo Audio File (WAV, MP3, M4A, etc.)",
182
+ )
183
+
184
+ gr.Markdown("### ⚙️ Step 2: Configure Options")
185
+ audio_format_input = gr.Dropdown(
186
+ choices=COMMON_AUDIO_FORMATS,
187
+ label="Original Audio Format (Crucial)",
188
+ value="mp3", # Default common format
189
+ )
190
+ cache_checkbox = gr.Checkbox(
191
+ label="Use Transcription Cache ⚡ (Speeds up re-runs)",
192
+ value=True, # Default to using cache
193
+ )
194
+
195
+ gr.Markdown("### ✨ Step 3: Generate!")
196
+ submit_button = gr.Button(
197
+ "Generate Microsite", variant="primary", elem_id="submit_button_custom"
198
+ )
199
+
200
+ with gr.Column(scale=2, min_width=400):
201
+ gr.Markdown("### 📊 Results")
202
+ log_output = gr.Textbox(
203
+ label="⚙️ Processing Log",
204
+ lines=12,
205
+ interactive=False,
206
+ placeholder="Workflow updates and logs will appear here...",
207
+ )
208
+ microsite_link_output = gr.Markdown(
209
+ label="🔗 Microsite Output",
210
+ value="Your deployed microsite link and details will appear here once generated.",
211
+ )
212
+
213
+ example_audio_file = "Listen to an A.I. sales rep cold call (and close) a prospect. #ai #sales.mp3"
214
+ if os.path.exists(example_audio_file):
215
+ gr.Examples(
216
+ examples=[[example_audio_file, "mp3", True]],
217
+ inputs=[audio_input, audio_format_input, cache_checkbox],
218
+ outputs=[log_output, microsite_link_output],
219
+ fn=generate_microsite_app,
220
+ cache_examples=False,
221
+ label="📋 Example (click to run)",
222
+ )
223
+ else:
224
+ gr.Markdown(
225
+ "<p style='text-align:center; font-style:italic; color:grey;'>Note: Example audio file 'Listen to an A.I. sales rep cold call (and close) a prospect. #ai #sales.mp3' not found. Examples disabled.</p>"
226
+ )
227
+
228
+ submit_button.click(
229
+ fn=generate_microsite_app,
230
+ inputs=[audio_input, audio_format_input, cache_checkbox],
231
+ outputs=[log_output, microsite_link_output],
232
+ api_name="generate_microsite",
233
+ )
234
+
235
+ if __name__ == "__main__":
236
+ if microsite_workflow is None:
237
+ print("CRITICAL: MicroSiteGenerator workflow failed to initialize. The Gradio app might not function correctly.")
238
+ print("Please check for errors above, ensure API keys (e.g., GOOGLE_API_KEY, NETLIFY_PERSONAL_ACCESS_TOKEN) are set in your .env file or environment, and all dependencies are installed.")
239
+ else:
240
+ print("MicroSiteGenerator workflow initialized successfully.")
241
+
242
+ if not os.getenv("NETLIFY_PERSONAL_ACCESS_TOKEN"):
243
+ print("CONSOLE REMINDER: NETLIFY_PERSONAL_ACCESS_TOKEN is not set. Deployment to Netlify will fail.")
244
+ if not os.getenv("GOOGLE_API_KEY"):
245
+ print("CONSOLE REMINDER: GOOGLE_API_KEY is not set. AI agent calls may fail.")
246
+
247
+ print("Gradio app starting...")
248
+ demo.launch()
pyproject.toml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "micrositepilot"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "agno>=1.5.10",
9
+ "fastapi>=0.115.12",
10
+ "google>=3.0.0",
11
+ "google-genai>=1.19.0",
12
+ "gradio>=5.33.0",
13
+ "netlify-python>=0.3.2",
14
+ "python-dotenv>=1.1.0",
15
+ ]
utils/netlify_deployment.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uuid
3
+ import requests
4
+ import hashlib
5
+
6
+
7
+ def deploy_html_file_with_digest(title, html_file_path, access_token=None):
8
+ """
9
+ Deploy a single HTML file to Netlify using the file digest method.
10
+
11
+ Args:
12
+ title (str): The title/name for the site
13
+ html_file_path (str): Path to the HTML file to deploy
14
+ access_token (str): Netlify personal access token (optional, will use env var if not provided)
15
+
16
+ Returns:
17
+ dict: Response containing site information and deploy details
18
+ """
19
+ # Use provided token or get from environment
20
+ token = access_token or os.getenv("NETLIFY_PERSONAL_ACCESS_TOKEN")
21
+ if not token:
22
+ raise ValueError("No Netlify access token provided")
23
+
24
+ # Generate a random site name
25
+ site_name = f"{title.lower().replace(' ', '-')}-{str(uuid.uuid4())[:8]}"
26
+
27
+ # Netlify API base URL
28
+ api_base = "https://api.netlify.com/api/v1"
29
+
30
+ # Headers for authentication
31
+ headers = {
32
+ "Authorization": f"Bearer {token}",
33
+ "User-Agent": "MicrositePilot-Deployer",
34
+ "Content-Type": "application/json",
35
+ }
36
+
37
+ try:
38
+ # Step 1: Create a new site
39
+ site_data = {
40
+ "name": site_name,
41
+ "processing_settings": {"html": {"pretty_urls": True}},
42
+ }
43
+
44
+ site_response = requests.post(
45
+ f"{api_base}/sites", headers=headers, json=site_data
46
+ )
47
+ site_response.raise_for_status()
48
+ site_info = site_response.json()
49
+
50
+ site_id = site_info["id"]
51
+ site_url = site_info["url"]
52
+ admin_url = site_info["admin_url"]
53
+
54
+ # Step 2: Read the HTML file and calculate SHA1
55
+ with open(html_file_path, "rb") as f:
56
+ html_content = f.read()
57
+
58
+ # Calculate SHA1 hash
59
+ sha1_hash = hashlib.sha1(html_content).hexdigest()
60
+
61
+ # Step 3: Create deployment with file digest
62
+
63
+ deploy_data = {"files": {"/index.html": sha1_hash}}
64
+
65
+ deploy_response = requests.post(
66
+ f"{api_base}/sites/{site_id}/deploys",
67
+ headers=headers,
68
+ json=deploy_data,
69
+ )
70
+ deploy_response.raise_for_status()
71
+ deploy_info = deploy_response.json()
72
+
73
+ deploy_id = deploy_info["id"]
74
+ required_files = deploy_info.get("required", [])
75
+ deploy_url = deploy_info.get("deploy_url", "")
76
+ deploy_state = deploy_info.get("state", "unknown")
77
+
78
+ # Step 4: Upload required files
79
+ if sha1_hash in required_files:
80
+ file_headers = {
81
+ "Authorization": f"Bearer {token}",
82
+ "Content-Type": "text/html",
83
+ "User-Agent": "MicrositePilot-Deployer",
84
+ }
85
+
86
+ upload_response = requests.put(
87
+ f"{api_base}/deploys/{deploy_id}/files/index.html",
88
+ headers=file_headers,
89
+ data=html_content,
90
+ )
91
+ upload_response.raise_for_status()
92
+ print(f"✅ File uploaded successfully!")
93
+ else:
94
+ print(f"ℹ️ File already exists on Netlify, no upload needed")
95
+
96
+ # Step 5: Check final deployment status
97
+ status_response = requests.get(
98
+ f"{api_base}/deploys/{deploy_id}", headers=headers
99
+ )
100
+ status_response.raise_for_status()
101
+ status_info = status_response.json()
102
+
103
+ final_state = status_info.get("state", "unknown")
104
+ final_url = status_info.get("deploy_url", deploy_url)
105
+
106
+ # Return comprehensive information
107
+ return {
108
+ "success": True,
109
+ "site": {
110
+ "id": site_id,
111
+ "name": site_name,
112
+ "url": site_url,
113
+ "admin_url": admin_url,
114
+ },
115
+ }
116
+
117
+ except requests.exceptions.RequestException as e:
118
+ return {
119
+ "success": False,
120
+ "error": str(e),
121
+ "message": f"Failed to deploy {title}",
122
+ }
123
+ except FileNotFoundError:
124
+ return {
125
+ "success": False,
126
+ "error": "File not found",
127
+ "message": f"HTML file {html_file_path} not found",
128
+ }
129
+ except Exception as e:
130
+ return {
131
+ "success": False,
132
+ "error": str(e),
133
+ "message": f"Unexpected error during deployment of {title}",
134
+ }
uv.lock ADDED
The diff for this file is too large to render. See raw diff
 
workflow.py ADDED
@@ -0,0 +1,375 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agno.workflow import Workflow, RunResponse, RunEvent
2
+ from agents.transcription_agent import transcription_agent, Transcription
3
+ from agents.site_builder_agent import microsite_builder_agent
4
+ from agents.info_extractor_agent import info_extractor
5
+ from utils.netlify_deployment import deploy_html_file_with_digest
6
+ from textwrap import dedent
7
+ from agno.agent import Agent
8
+ from typing import Iterator, Union, Optional
9
+ from logging import Logger
10
+ from pathlib import Path
11
+ from agno.media import Audio
12
+ from dotenv import load_dotenv
13
+ import requests
14
+ import json
15
+ from datetime import datetime
16
+
17
+ load_dotenv()
18
+
19
+ # It's good practice to get a logger instance here, though `logging` module needs configuration
20
+ logger = Logger(__name__)
21
+
22
+
23
+ class MicroSiteGenerator(Workflow):
24
+ description: str = dedent(
25
+ """\
26
+ An intelligent AI agent that seamlessly transforms product demo call recordings into personalized, interactive recap websites. This workflow orchestrates multiple AI agents to transcribe the demo, intelligently extract key discussion points and features, and dynamically assemble compelling, shareable microsites.
27
+ """
28
+ )
29
+
30
+ transcriber: Agent = transcription_agent
31
+ info_extractor: Agent = info_extractor
32
+ microsite_builder: Agent = microsite_builder_agent
33
+
34
+ def save_html_to_file(self, html_content: str) -> str:
35
+ """
36
+ Manually save HTML content to the microsites directory.
37
+
38
+ Args:
39
+ html_content: The HTML content to save
40
+
41
+ Returns:
42
+ str: The full path to the saved HTML file
43
+ """
44
+ # Create microsites directory if it doesn't exist
45
+ microsites_dir = Path(__file__).parent.parent / "microsites"
46
+ microsites_dir.mkdir(exist_ok=True)
47
+
48
+ # Generate filename with timestamp
49
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
50
+ filename = f"demo_{timestamp}.html"
51
+ file_path = microsites_dir / filename
52
+
53
+ try:
54
+ # Write HTML content to file
55
+ with open(file_path, "w", encoding="utf-8") as f:
56
+ f.write(html_content)
57
+
58
+ logger.info(f"HTML saved successfully to: {file_path}")
59
+ return str(file_path)
60
+
61
+ except Exception as e:
62
+ logger.error(f"Failed to save HTML file: {e}")
63
+ raise Exception(f"Could not save HTML file: {e}")
64
+
65
+ def run(
66
+ self,
67
+ audio_source: str,
68
+ audio_format: str,
69
+ use_transcription_cache: bool = True,
70
+ ) -> Iterator[RunResponse]:
71
+ logger.info("Microsite generation initiated.")
72
+
73
+ transcription_results: Optional[Transcription] = None
74
+ if use_transcription_cache:
75
+ transcription_results = self.get_cached_transcription(audio_source)
76
+ if transcription_results:
77
+ logger.info(f"Using cached transcription for {audio_source}")
78
+ else:
79
+ logger.info(
80
+ f"No cached transcription found for {audio_source}, transcribing now."
81
+ )
82
+ transcription_results = self.transcribe_audio(
83
+ audio_source, audio_format
84
+ )
85
+ if transcription_results:
86
+ self._add_transcription_to_cache(audio_source, transcription_results)
87
+ extracted_info: RunResponse = self.info_extractor.run(
88
+ message=transcription_results.transcription
89
+ )
90
+ extracted_info = self.remove_markdown_json_wrapper(extracted_info.content)
91
+ print(extracted_info)
92
+
93
+ microsite_builder_input = {
94
+ "extracted_info_json": extracted_info,
95
+ "raw_transcription": transcription_results.transcription,
96
+ }
97
+ site_html: RunResponse = microsite_builder_agent.run(
98
+ json.dumps(microsite_builder_input)
99
+ )
100
+
101
+ # Save HTML to filesystem using manual function
102
+ html_file_path = self.save_html_to_file(site_html.content.content)
103
+ logger.info(f"HTML saved to: {html_file_path}")
104
+
105
+ product_name = json.loads(extracted_info)["product_name"]
106
+
107
+ site_details = deploy_html_file_with_digest(
108
+ title=product_name,
109
+ html_file_path=html_file_path,
110
+ )
111
+
112
+ yield RunResponse(
113
+ content=site_details,
114
+ event=RunEvent.workflow_completed,
115
+ )
116
+ else:
117
+ yield RunResponse(
118
+ content="Site was not generated",
119
+ event=RunEvent.workflow_completed,
120
+ )
121
+
122
+ # transcription_results = self.transcribe_audio(audio_source, audio_format)
123
+ # if transcription_results:
124
+ # yield RunResponse(
125
+ # content=transcription_results.transcription, # The transcription text
126
+ # event=RunEvent.workflow_completed,
127
+ # )
128
+ # else:
129
+ # yield RunResponse(
130
+ # content="Transcription failed.", event=RunEvent.workflow_completed
131
+ # )
132
+ # extracted_info: RunResponse = self.info_extractor.run(
133
+ # message=transcription_results.transcription
134
+ # )
135
+ # print(self.remove_markdown_json_wrapper(extracted_info.content))
136
+
137
+ def get_cached_transcription(
138
+ self, audio_source: Union[str, Path, bytes]
139
+ ) -> Optional[Transcription]:
140
+ """
141
+ Retrieves a cached transcription result for a given audio source.
142
+ """
143
+ # For caching, audio_source needs to be hashable. If it's bytes, convert to a string key.
144
+ cache_key = (
145
+ str(audio_source)
146
+ if isinstance(audio_source, (str, Path))
147
+ else f"bytes_hash_{hash(audio_source)}"
148
+ )
149
+ logger.info(f"Checking if cached transcription exists for {cache_key}.")
150
+ transcription_result = self.session_state.get("transcription_cache", {}).get(
151
+ cache_key
152
+ )
153
+ # Use model_validate to convert dict from cache back to Transcription object
154
+ return (
155
+ Transcription.model_validate(transcription_result)
156
+ if transcription_result and isinstance(transcription_result, dict)
157
+ else None
158
+ )
159
+
160
+ def _add_transcription_to_cache(
161
+ self, audio_source: Union[str, Path, bytes], transcription_result: Transcription
162
+ ):
163
+ """
164
+ Adds a transcription result to the session cache.
165
+ """
166
+ cache_key = (
167
+ str(audio_source)
168
+ if isinstance(audio_source, (str, Path))
169
+ else f"bytes_hash_{hash(audio_source)}"
170
+ )
171
+ logger.info(f"Saving transcription results for audio source: {cache_key}")
172
+ self.session_state.setdefault("transcription_cache", {})
173
+ # Store the Pydantic model as a dictionary
174
+ self.session_state["transcription_cache"][
175
+ cache_key
176
+ ] = transcription_result.model_dump()
177
+
178
+ def remove_markdown_json_wrapper(self, json_string_with_markdown: str) -> str:
179
+ """
180
+ Removes the '```json' prefix and '```' suffix from a string,
181
+ assuming the JSON content is wrapped in a Markdown code block.
182
+
183
+ Args:
184
+ json_string_with_markdown: The string containing the JSON wrapped in markdown.
185
+ Expected format: ```json\n{...json content...}\n```
186
+
187
+ Returns:
188
+ The cleaned JSON string without the markdown wrapper.
189
+ """
190
+ cleaned_string = json_string_with_markdown
191
+
192
+ # Remove '```json\n' from the start
193
+ if cleaned_string.startswith("```json\n"):
194
+ cleaned_string = cleaned_string[len("```json\n") :]
195
+
196
+ # Remove '\n```' from the end
197
+ if cleaned_string.endswith("\n```"):
198
+ cleaned_string = cleaned_string[: -len("\n```")]
199
+
200
+ return cleaned_string
201
+
202
+ # --- Caching Functions ---
203
+ def get_cached_transcription(
204
+ self, audio_source: Union[str, Path, bytes]
205
+ ) -> Optional[Transcription]:
206
+ """
207
+ Retrieves a cached transcription result for a given audio source.
208
+ """
209
+ # For caching, audio_source needs to be hashable. If it's bytes, convert to a string key.
210
+ cache_key = (
211
+ str(audio_source)
212
+ if isinstance(audio_source, (str, Path))
213
+ else f"bytes_hash_{hash(audio_source)}"
214
+ )
215
+ logger.info(f"Checking if cached transcription exists for {cache_key}.")
216
+ transcription_result = self.session_state.get("transcription_cache", {}).get(
217
+ cache_key
218
+ )
219
+ # Use model_validate to convert dict from cache back to Transcription object
220
+ return (
221
+ Transcription.model_validate(transcription_result)
222
+ if transcription_result and isinstance(transcription_result, dict)
223
+ else None
224
+ )
225
+
226
+ def _add_transcription_to_cache(
227
+ self, audio_source: Union[str, Path, bytes], transcription_result: Transcription
228
+ ):
229
+ """
230
+ Adds a transcription result to the session cache.
231
+ """
232
+ cache_key = (
233
+ str(audio_source)
234
+ if isinstance(audio_source, (str, Path))
235
+ else f"bytes_hash_{hash(audio_source)}"
236
+ )
237
+ logger.info(f"Saving transcription results for audio source: {cache_key}")
238
+ self.session_state.setdefault("transcription_cache", {})
239
+ # Store the Pydantic model as a dictionary
240
+ self.session_state["transcription_cache"][
241
+ cache_key
242
+ ] = transcription_result.model_dump()
243
+
244
+ # --- Audio Handling Function ---
245
+ def _download_audio(self, url: str) -> bytes:
246
+ """
247
+ Downloads audio from a given URL.
248
+ """
249
+ logger.info(f"Attempting to download audio from URL: {url}")
250
+ try:
251
+ response = requests.get(url, stream=True)
252
+ response.raise_for_status() # Raise an exception for HTTP errors
253
+ return response.content
254
+ except requests.exceptions.RequestException as e:
255
+ logger.error(f"Failed to download audio from {url}: {e}")
256
+ raise ValueError(f"Could not download audio from URL: {e}")
257
+
258
+ def _get_audio_bytes(self, source: Union[str, Path, bytes]) -> bytes:
259
+ """
260
+ Retrieves audio content as bytes from various sources (path, URL, or raw bytes).
261
+ """
262
+ if isinstance(source, bytes):
263
+ return source
264
+ elif isinstance(source, (str, Path)):
265
+ str_source = str(source)
266
+ if str_source.startswith(("http://", "https://")):
267
+ return self._download_audio(str_source)
268
+ return Path(str_source).read_bytes()
269
+ raise ValueError("Unsupported audio source type.")
270
+
271
+ # --- Transcription Execution Functions ---
272
+ def _run_transcription_agent(
273
+ self,
274
+ audio_source_bytes: bytes,
275
+ audio_format: str,
276
+ ):
277
+ """
278
+ Executes the transcription agent with the given audio bytes.
279
+ """
280
+ logger.info(f"Running transcription agent for audio format: {audio_format}")
281
+ try:
282
+ run_response: RunResponse = self.transcriber.run(
283
+ input="Transcribe this audio exactly as heard",
284
+ audio=[Audio(content=audio_source_bytes, format=audio_format)],
285
+ )
286
+ return run_response.content
287
+ except Exception as e:
288
+ logger.error(f"Transcription agent failed: {str(e)}")
289
+ return None
290
+
291
+ def transcribe_audio(
292
+ self,
293
+ audio_source: Union[str, Path, bytes],
294
+ audio_format: str = "wav",
295
+ num_attempts: int = 3,
296
+ ):
297
+ """
298
+ Manages the transcription process, including getting audio bytes and retrying the agent.
299
+ """
300
+ logger.info("Initiating audio transcription process.")
301
+ try:
302
+ audio_bytes = self._get_audio_bytes(audio_source)
303
+ except (ValueError, NotImplementedError) as e:
304
+ logger.error(f"Failed to get audio bytes: {str(e)}")
305
+ return None
306
+
307
+ for attempt in range(num_attempts):
308
+ transcription_response = self._run_transcription_agent(
309
+ audio_bytes, audio_format
310
+ )
311
+ if transcription_response:
312
+ logger.info(f"Transcription successful after {attempt + 1} attempt(s).")
313
+ return transcription_response
314
+ else:
315
+ logger.warning(
316
+ f"Transcription attempt {attempt + 1}/{num_attempts} failed."
317
+ )
318
+ logger.error(
319
+ f"Transcription failed after {num_attempts} attempts for {audio_source}."
320
+ )
321
+ return None
322
+
323
+ # # --- Transcription Phase ---
324
+ # transcription_results: Optional[Transcription] = None
325
+ # if use_transcription_cache:
326
+ # transcription_results = self.get_cached_transcription(audio_source)
327
+ # if transcription_results:
328
+ # logger.info(f"Using cached transcription for {audio_source}")
329
+ # # Yield cached transcription as RunResponse
330
+ # yield RunResponse(
331
+ # content=f"Using cached transcription: {transcription_results.transcription}",
332
+ # event=RunEvent.workflow_completed,
333
+ # )
334
+ # return
335
+ # else:
336
+ # logger.info(
337
+ # f"No cached transcription found for {audio_source}, transcribing now."
338
+ # )
339
+ # transcription_results = self.transcribe_audio(
340
+ # audio_source, audio_format
341
+ # )
342
+ # if transcription_results:
343
+ # self._add_transcription_to_cache(
344
+ # audio_source, transcription_results
345
+ # )
346
+ # else:
347
+ # logger.info(
348
+ # f"Transcription cache disabled, transcribing {audio_source} now."
349
+ # )
350
+ # transcription_results = self.transcribe_audio(audio_source, audio_format)
351
+ # if transcription_results:
352
+ # self._add_transcription_to_cache(audio_source, transcription_results)
353
+
354
+ # if transcription_results is None:
355
+ # logger.error("Transcription was not successful. Workflow halted.")
356
+ # yield RunResponse(
357
+ # content="Transcription failed. Workflow halted.",
358
+ # event=RunEvent.workflow_completed,
359
+ # )
360
+ # return
361
+
362
+ # # --- Information Extraction Phase ---
363
+ # logger.info("Transcription successful. Proceeding to information extraction.")
364
+ # # Run the info_extractor agent and yield its response
365
+ # yield from self.info_extractor.run(
366
+ # input=transcription_results.transcription, # Pass the raw string transcription to the extractor
367
+ # stream=True,
368
+ # )
369
+
370
+ # # Cache the final result
371
+ # if (
372
+ # self.info_extractor.run_response
373
+ # and self.info_extractor.run_response.content
374
+ # ):
375
+ # logger.info("Information extraction successful. Workflow completed.")