Spaces:
Sleeping
Sleeping
updated completely with open source code
Browse files
app.py
CHANGED
|
@@ -1,485 +1,206 @@
|
|
| 1 |
-
# ==============================================================================
|
| 2 |
-
# PitchPerfect AI: Enterprise-Grade Sales Coach (Single File Application)
|
| 3 |
-
#
|
| 4 |
-
# This single file contains the complete application code, enhanced with
|
| 5 |
-
# YouTube support, JAX-based quantitative analysis, and a more robust
|
| 6 |
-
# agentic architecture.
|
| 7 |
-
# ==============================================================================
|
| 8 |
-
|
| 9 |
-
# ==============================================================================
|
| 10 |
-
# File: README.md (Instructions)
|
| 11 |
-
# ==============================================================================
|
| 12 |
"""
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
This
|
| 16 |
-
|
| 17 |
-
This advanced version includes:
|
| 18 |
-
- Support for local video uploads and YouTube URLs.
|
| 19 |
-
- Quantitative vocal analysis powered by JAX for high performance.
|
| 20 |
-
- An agentic architecture where specialized tools (YouTube Downloader, JAX Analyzer) work in concert with the Gemini 1.5 Pro model.
|
| 21 |
-
|
| 22 |
-
## π Prerequisites
|
| 23 |
-
|
| 24 |
-
1. A Google Cloud Platform (GCP) project with billing enabled.
|
| 25 |
-
2. The Vertex AI API and Cloud Storage API enabled in your GCP project.
|
| 26 |
-
3. The `gcloud` CLI installed and authenticated on your local machine.
|
| 27 |
|
| 28 |
-
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
* **Example name:** `your-project-id-pitch-videos`
|
| 33 |
-
|
| 34 |
-
2. **Authenticate with Google Cloud:**
|
| 35 |
-
Run the following command in your terminal and follow the prompts. This sets up Application Default Credentials (ADC).
|
| 36 |
-
```bash
|
| 37 |
-
gcloud auth application-default login
|
| 38 |
-
```
|
| 39 |
-
*Note: The user/principal needs `Storage Object Admin` and `Vertex AI User` roles.*
|
| 40 |
-
|
| 41 |
-
3. **Install Dependencies:**
|
| 42 |
-
Create a `requirements.txt` file with the content below and run `pip install -r requirements.txt`.
|
| 43 |
-
```
|
| 44 |
-
gradio
|
| 45 |
-
google-cloud-aiplatform
|
| 46 |
-
google-cloud-storage
|
| 47 |
-
moviepy
|
| 48 |
-
# For JAX and Quantitative Analysis
|
| 49 |
-
jax
|
| 50 |
-
jaxlib
|
| 51 |
-
librosa
|
| 52 |
-
speechrecognition
|
| 53 |
-
openai-whisper
|
| 54 |
-
# For YouTube support
|
| 55 |
-
yt-dlp
|
| 56 |
-
```
|
| 57 |
-
|
| 58 |
-
4. **Configure Project Details:**
|
| 59 |
-
* In this file, scroll down to the "CONFIGURATION" section.
|
| 60 |
-
* Set your `GCP_PROJECT_ID`, `GCP_LOCATION`, and `GCS_BUCKET_NAME`.
|
| 61 |
-
|
| 62 |
-
5. **Run the Application:**
|
| 63 |
-
```bash
|
| 64 |
-
python app.py
|
| 65 |
-
```
|
| 66 |
-
This will launch a Gradio web server. **Look for a public URL ending in `.gradio.live` in the output and open it in your browser.**
|
| 67 |
"""
|
| 68 |
|
| 69 |
-
# ==============================================================================
|
| 70 |
-
# IMPORTS
|
| 71 |
-
# ==============================================================================
|
| 72 |
-
import logging
|
| 73 |
-
import json
|
| 74 |
-
import uuid
|
| 75 |
import os
|
| 76 |
import re
|
| 77 |
-
import sys
|
| 78 |
-
import subprocess
|
| 79 |
-
from typing import Dict, Any, List
|
| 80 |
-
|
| 81 |
-
# Setup Logging
|
| 82 |
-
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
def check_gcloud_auth():
|
| 88 |
-
"""Checks for GCP authentication and provides instructions if missing."""
|
| 89 |
-
try:
|
| 90 |
-
# The 'gcloud auth print-access-token' command is a reliable way to check for active credentials.
|
| 91 |
-
# We capture stderr to suppress the verbose output on success.
|
| 92 |
-
subprocess.check_output("gcloud auth application-default print-access-token", shell=True, stderr=subprocess.DEVNULL)
|
| 93 |
-
logging.info("GCP authentication credentials found.")
|
| 94 |
-
return True
|
| 95 |
-
except subprocess.CalledProcessError:
|
| 96 |
-
error_message = """
|
| 97 |
-
================================================================================
|
| 98 |
-
CRITICAL ERROR: Google Cloud Authentication Not Found!
|
| 99 |
-
================================================================================
|
| 100 |
-
This application requires authentication to access Google Cloud services (Vertex AI, Cloud Storage).
|
| 101 |
-
|
| 102 |
-
To fix this, please run the following command in your terminal and follow the prompts to log in with your Google account:
|
| 103 |
-
|
| 104 |
-
gcloud auth application-default login
|
| 105 |
-
|
| 106 |
-
After authenticating, please restart this Python script.
|
| 107 |
-
================================================================================
|
| 108 |
-
"""
|
| 109 |
-
print(error_message, file=sys.stderr)
|
| 110 |
-
return False
|
| 111 |
-
|
| 112 |
-
# Run the auth check before trying to import heavy libraries
|
| 113 |
-
if not check_gcloud_auth():
|
| 114 |
-
sys.exit(1)
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
from typing import Dict, Any
|
| 119 |
import gradio as gr
|
| 120 |
-
import
|
| 121 |
-
|
| 122 |
-
from
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
#
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
#
|
| 141 |
-
#
|
| 142 |
-
#
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
# ---
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
"
|
| 162 |
-
"
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
}
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
"
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
PROMPT_HOLISTIC_VIDEO_ANALYSIS = """
|
| 189 |
-
You are an expert sales coach. Analyze the provided video and the supplementary quantitative metrics to generate a structured, holistic feedback report. Your output MUST strictly conform to the provided JSON schema, including the 1-10 score range.
|
| 190 |
-
|
| 191 |
-
**Quantitative Metrics (for additional context):**
|
| 192 |
-
{quantitative_metrics_json}
|
| 193 |
-
|
| 194 |
-
**Evaluation Framework (Analyze the video directly):**
|
| 195 |
-
1. **Content & Structure:** Analyze clarity, flow, value proposition, and the call to action.
|
| 196 |
-
2. **Vocal Delivery:** Analyze pacing, vocal variety, confidence, energy, and enunciation. Use the quantitative metrics to inform your qualitative assessment.
|
| 197 |
-
3. **Visual Delivery:** Analyze eye contact, body language, and facial expressions.
|
| 198 |
-
|
| 199 |
-
Provide specific examples from the video to support your points.
|
| 200 |
-
"""
|
| 201 |
-
|
| 202 |
-
PROMPT_FINAL_SYNTHESIS = """
|
| 203 |
-
You are a senior executive coach. Synthesize the provided detailed analysis data into a high-level summary. Your output MUST strictly conform to the provided JSON schema.
|
| 204 |
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
|
| 209 |
-
|
| 210 |
-
---
|
| 211 |
-
{full_analysis_json}
|
| 212 |
-
---
|
| 213 |
-
"""
|
| 214 |
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
if not os.path.exists(output_dir):
|
| 222 |
-
os.makedirs(output_dir)
|
| 223 |
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
}
|
| 230 |
-
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 231 |
-
ydl.download([url])
|
| 232 |
-
return filepath
|
| 233 |
-
|
| 234 |
-
class QuantitativeAudioTool:
|
| 235 |
-
"""A tool for performing objective, numerical analysis on an audio track."""
|
| 236 |
-
class JAXAudioProcessor:
|
| 237 |
-
"""A nested class demonstrating JAX for high-performance audio processing."""
|
| 238 |
-
def __init__(self):
|
| 239 |
-
self.jit_rms_energy = jax.jit(self._calculate_rms_energy)
|
| 240 |
-
@staticmethod
|
| 241 |
-
@jax.jit
|
| 242 |
-
def _calculate_rms_energy(waveform: jnp.ndarray) -> jnp.ndarray:
|
| 243 |
-
return jnp.sqrt(jnp.mean(jnp.square(waveform)))
|
| 244 |
-
def analyze_energy_variation(self, waveform_np):
|
| 245 |
-
if waveform_np is None or waveform_np.size == 0: return 0.0
|
| 246 |
-
waveform_jnp = jnp.asarray(waveform_np)
|
| 247 |
-
frame_length, hop_length = 2048, 512
|
| 248 |
-
num_frames = (waveform_jnp.shape[0] - frame_length) // hop_length
|
| 249 |
-
start_positions = jnp.arange(num_frames) * hop_length
|
| 250 |
-
offsets = jnp.arange(frame_length)
|
| 251 |
-
frame_indices = start_positions[:, None] + offsets[None, :]
|
| 252 |
-
frames = waveform_jnp[frame_indices]
|
| 253 |
-
frame_energies = jax.vmap(self.jit_rms_energy)(frames)
|
| 254 |
-
return float(jnp.std(frame_energies))
|
| 255 |
-
|
| 256 |
-
def __init__(self):
|
| 257 |
-
self.jax_processor = self.JAXAudioProcessor()
|
| 258 |
-
self.whisper_model = whisper.load_model("base.en")
|
| 259 |
-
|
| 260 |
-
def run(self, video_path: str, output_dir: str = "temp_output"):
|
| 261 |
-
if not os.path.exists(output_dir): os.makedirs(output_dir)
|
| 262 |
-
video = None
|
| 263 |
-
try:
|
| 264 |
-
video = VideoFileClip(video_path)
|
| 265 |
-
|
| 266 |
-
if video.audio is None:
|
| 267 |
-
raise ValueError("The provided video file does not contain an audio track, or it could not be decoded. Analysis cannot proceed.")
|
| 268 |
-
|
| 269 |
-
audio_path = os.path.join(output_dir, f"audio_{uuid.uuid4()}.wav")
|
| 270 |
-
video.audio.write_audiofile(audio_path, codec='pcm_s16le', fps=16000)
|
| 271 |
-
|
| 272 |
-
transcript_result = self.whisper_model.transcribe(audio_path, fp16=False)
|
| 273 |
-
word_count = len(transcript_result['text'].split())
|
| 274 |
-
duration = video.duration
|
| 275 |
-
pace = (word_count / duration) * 60 if duration > 0 else 0
|
| 276 |
-
|
| 277 |
-
y, sr = librosa.load(audio_path, sr=16000)
|
| 278 |
-
energy_variation = self.jax_processor.analyze_energy_variation(y)
|
| 279 |
-
|
| 280 |
-
os.remove(audio_path)
|
| 281 |
-
|
| 282 |
-
return {
|
| 283 |
-
"speaking_pace_wpm": round(pace, 2),
|
| 284 |
-
"vocal_energy_variation": round(energy_variation, 4),
|
| 285 |
-
}
|
| 286 |
-
finally:
|
| 287 |
-
if video:
|
| 288 |
-
video.close()
|
| 289 |
-
|
| 290 |
-
# ==============================================================================
|
| 291 |
-
# VERTEX AI MANAGER CLASS
|
| 292 |
-
# ==============================================================================
|
| 293 |
-
class VertexAIManager:
|
| 294 |
-
def __init__(self):
|
| 295 |
-
vertexai.init(project=GCP_PROJECT_ID, location=GCP_LOCATION)
|
| 296 |
-
self.model = GenerativeModel(MODEL_GEMINI_PRO)
|
| 297 |
-
|
| 298 |
-
def run_multimodal_analysis(self, video_gcs_uri: str, prompt: str) -> dict:
|
| 299 |
-
video_part = Part.from_uri(uri=video_gcs_uri, mime_type="video/mp4")
|
| 300 |
-
contents = [video_part, prompt]
|
| 301 |
-
config = GenerationConfig(response_schema=HOLISTIC_ANALYSIS_SCHEMA, temperature=0.2, response_mime_type="application/json")
|
| 302 |
-
response = self.model.generate_content(contents, generation_config=config)
|
| 303 |
-
return json.loads(response.text)
|
| 304 |
-
|
| 305 |
-
def run_synthesis(self, prompt: str) -> dict:
|
| 306 |
-
config = GenerationConfig(response_schema=FINAL_SYNTHESIS_SCHEMA, temperature=0.3, response_mime_type="application/json")
|
| 307 |
-
response = self.model.generate_content(prompt, generation_config=config)
|
| 308 |
-
return json.loads(response.text)
|
| 309 |
-
|
| 310 |
-
# ==============================================================================
|
| 311 |
-
# AGENT CLASS
|
| 312 |
-
# ==============================================================================
|
| 313 |
-
class PitchAnalyzerAgent:
|
| 314 |
-
def __init__(self):
|
| 315 |
-
self.vertex_manager = VertexAIManager()
|
| 316 |
-
self.storage_client = storage.Client(project=GCP_PROJECT_ID)
|
| 317 |
-
self.youtube_tool = YouTubeDownloaderTool()
|
| 318 |
-
self.quant_tool = QuantitativeAudioTool()
|
| 319 |
-
self._check_bucket()
|
| 320 |
-
|
| 321 |
-
def _check_bucket(self):
|
| 322 |
-
self.storage_client.get_bucket(GCS_BUCKET_NAME)
|
| 323 |
-
|
| 324 |
-
def _upload_to_gcs(self, path: str) -> str:
|
| 325 |
-
bucket = self.storage_client.bucket(GCS_BUCKET_NAME)
|
| 326 |
-
blob_name = f"pitch-videos/{uuid.uuid4()}.mp4"
|
| 327 |
-
blob = bucket.blob(blob_name)
|
| 328 |
-
blob.upload_from_filename(path)
|
| 329 |
-
return f"gs://{GCS_BUCKET_NAME}/{blob_name}"
|
| 330 |
-
|
| 331 |
-
def _delete_from_gcs(self, gcs_uri: str):
|
| 332 |
-
bucket_name, blob_name = gcs_uri.replace("gs://", "").split("/", 1)
|
| 333 |
-
self.storage_client.bucket(bucket_name).blob(blob_name).delete()
|
| 334 |
-
|
| 335 |
-
def run_analysis_pipeline(self, video_path_or_url: str, progress_callback):
|
| 336 |
-
local_video_path = None
|
| 337 |
-
video_gcs_uri = None
|
| 338 |
-
try:
|
| 339 |
-
if re.match(r"^(https?://)?(www\.)?(youtube\.com|youtu\.?be)/.+$", video_path_or_url):
|
| 340 |
-
progress_callback(0.1, "Downloading video from YouTube...")
|
| 341 |
-
local_video_path = self.youtube_tool.run(video_path_or_url)
|
| 342 |
-
else:
|
| 343 |
-
local_video_path = video_path_or_url
|
| 344 |
-
|
| 345 |
-
progress_callback(0.3, "Performing JAX-based quantitative analysis...")
|
| 346 |
-
quant_metrics = self.quant_tool.run(local_video_path)
|
| 347 |
-
|
| 348 |
-
progress_callback(0.5, "Uploading video to secure Cloud Storage...")
|
| 349 |
-
video_gcs_uri = self._upload_to_gcs(local_video_path)
|
| 350 |
-
|
| 351 |
-
progress_callback(0.7, "Gemini 1.5 Pro is analyzing the video...")
|
| 352 |
-
analysis_prompt = PROMPT_HOLISTIC_VIDEO_ANALYSIS.format(quantitative_metrics_json=json.dumps(quant_metrics, indent=2))
|
| 353 |
-
multimodal_analysis = self.vertex_manager.run_multimodal_analysis(video_gcs_uri, analysis_prompt)
|
| 354 |
-
|
| 355 |
-
progress_callback(0.9, "Synthesizing final report...")
|
| 356 |
-
synthesis_prompt = PROMPT_FINAL_SYNTHESIS.format(full_analysis_json=json.dumps(multimodal_analysis, indent=2))
|
| 357 |
-
final_summary = self.vertex_manager.run_synthesis(synthesis_prompt)
|
| 358 |
-
|
| 359 |
-
return {"quantitative_metrics": quant_metrics, "multimodal_analysis": multimodal_analysis, "executive_summary": final_summary}
|
| 360 |
-
except Exception as e:
|
| 361 |
-
logging.error(f"Analysis pipeline failed: {e}", exc_info=True)
|
| 362 |
-
return {"error": str(e)}
|
| 363 |
-
finally:
|
| 364 |
-
if video_gcs_uri:
|
| 365 |
-
try: self._delete_from_gcs(video_gcs_uri)
|
| 366 |
-
except Exception as e: logging.warning(f"Failed to delete GCS object {video_gcs_uri}: {e}")
|
| 367 |
-
if local_video_path and video_path_or_url != local_video_path:
|
| 368 |
-
if os.path.exists(local_video_path): os.remove(local_video_path)
|
| 369 |
-
|
| 370 |
-
# ==============================================================================
|
| 371 |
-
# UI FORMATTING HELPER
|
| 372 |
-
# ==============================================================================
|
| 373 |
-
def format_feedback_markdown(analysis: dict) -> str:
|
| 374 |
-
if not analysis or "error" in analysis:
|
| 375 |
-
return f"## Analysis Failed π\n\n**Reason:** {analysis.get('error', 'Unknown error.')}"
|
| 376 |
|
| 377 |
-
|
| 378 |
-
metrics = analysis.get('quantitative_metrics', {})
|
| 379 |
-
ai_analysis = analysis.get('multimodal_analysis', {})
|
| 380 |
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
if
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
def get_energy_rating(variation):
|
| 388 |
-
if variation == 0: return "N/A"
|
| 389 |
-
if variation < 0.02: return "Consistent / Monotonous"
|
| 390 |
-
if variation <= 0.05: return "Moderately Dynamic"
|
| 391 |
-
return "Highly Dynamic & Engaging"
|
| 392 |
-
|
| 393 |
-
wpm = metrics.get('speaking_pace_wpm', 0)
|
| 394 |
-
energy_var = metrics.get('vocal_energy_variation', 0)
|
| 395 |
-
pace_rating = get_pace_rating(wpm)
|
| 396 |
-
energy_rating = get_energy_rating(energy_var)
|
| 397 |
-
|
| 398 |
-
metrics_md = f"""
|
| 399 |
-
- **Speaking Pace:** **{wpm} WPM** *(Rating: {pace_rating})*
|
| 400 |
-
- *This measures the number of words spoken per minute. A typical conversational pace is between 120-160 WPM.*
|
| 401 |
-
- **Vocal Energy Variation:** **{energy_var:.4f}** *(Rating: {energy_rating})*
|
| 402 |
-
- *This measures the standard deviation of your vocal loudness. A higher value indicates a more dynamic and engaging vocal range, while a very low value suggests a monotonous delivery.*
|
| 403 |
-
"""
|
| 404 |
-
|
| 405 |
-
# --- FIX: Revert to using bold text instead of headers for consistency ---
|
| 406 |
-
def format_ai_item(title, data):
|
| 407 |
-
if not data or "score" not in data: return f"**{title}:**\n> Analysis not available.\n\n"
|
| 408 |
-
raw_score = data.get('score', 0); score = max(1, min(10, raw_score))
|
| 409 |
-
stars = "π’" * score + "βͺοΈ" * (10 - score)
|
| 410 |
-
feedback = data.get('feedback', 'No feedback.').replace('\n', '\n> ')
|
| 411 |
-
return f"**{title}:** `{stars} [{score}/10]`\n\n> {feedback}\n\n"
|
| 412 |
|
| 413 |
-
|
| 414 |
|
| 415 |
-
# --- FIX: Use a more consistent structure for the final report ---
|
| 416 |
-
return f"""
|
| 417 |
-
# PitchPerfect AI Analysis Report π
|
| 418 |
-
## π Executive Summary
|
| 419 |
-
### Key Strengths
|
| 420 |
-
{summary.get('key_strengths', '- N/A')}
|
| 421 |
-
### High-Leverage Growth Opportunities
|
| 422 |
-
{summary.get('growth_opportunities', '- N/A')}
|
| 423 |
-
### Final Verdict
|
| 424 |
-
> {summary.get('executive_summary', 'N/A')}
|
| 425 |
-
---
|
| 426 |
-
## π Quantitative Metrics Explained (via JAX)
|
| 427 |
-
{metrics_md}
|
| 428 |
-
---
|
| 429 |
-
## π§ AI Multimodal Analysis (via Gemini 1.5 Pro)
|
| 430 |
-
### I. Content & Structure
|
| 431 |
-
{format_ai_item("Clarity", content.get('clarity'))}
|
| 432 |
-
{format_ai_item("Structure & Flow", content.get('structure'))}
|
| 433 |
-
{format_ai_item("Value Proposition", content.get('value_proposition'))}
|
| 434 |
-
{format_ai_item("Call to Action (CTA)", content.get('cta'))}
|
| 435 |
-
<hr style="border:1px solid #ddd">
|
| 436 |
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
<hr style="border:1px solid #ddd">
|
| 443 |
|
| 444 |
-
|
| 445 |
-
{format_ai_item("Eye Contact", visual.get('eye_contact'))}
|
| 446 |
-
{format_ai_item("Body Language", visual.get('body_language'))}
|
| 447 |
-
{format_ai_item("Facial Expressions", visual.get('facial_expressions'))}
|
| 448 |
-
"""
|
| 449 |
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
try:
|
| 457 |
-
pitch_agent = PitchAnalyzerAgent()
|
| 458 |
-
except Exception as e:
|
| 459 |
-
logging.fatal(f"Failed to initialize agent during startup: {e}", exc_info=True)
|
| 460 |
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 481 |
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
+
π The Content Creator's Journey (AI-Powered Edition)
|
| 3 |
+
==================================================
|
| 4 |
+
This version replaces all mocked functions with real generative AI models
|
| 5 |
+
for image analysis, content generation, and translation.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
+
- Stage 1: Inspiration Hub (Summarization AI)
|
| 8 |
+
- Stage 2: Creative Studio (Image-to-Text and Text Generation AI)
|
| 9 |
+
- Stage 3: Globalization Suite (Translation AI)
|
| 10 |
|
| 11 |
+
Author: Gemini
|
| 12 |
+
Date: July 4, 2025
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
"""
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
import os
|
| 16 |
import re
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
import gradio as gr
|
| 18 |
+
import arxiv
|
| 19 |
+
import nltk
|
| 20 |
+
from transformers import pipeline
|
| 21 |
+
from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound
|
| 22 |
+
from youtube_transcript_api.formatters import TextFormatter
|
| 23 |
+
from PIL import Image
|
| 24 |
+
|
| 25 |
+
# --- Model & Pipeline Setup (Lazy Loading) ---
|
| 26 |
+
|
| 27 |
+
# Dictionary to hold our models, loaded only when needed.
|
| 28 |
+
models = {}
|
| 29 |
+
|
| 30 |
+
def get_pipeline(task, model_name):
|
| 31 |
+
"""Initializes and returns a pipeline, caching it for reuse."""
|
| 32 |
+
if model_name not in models:
|
| 33 |
+
print(f"π Initializing {task} pipeline with model {model_name}...")
|
| 34 |
+
models[model_name] = pipeline(task, model=model_name)
|
| 35 |
+
print(f"β
{model_name} loaded.")
|
| 36 |
+
return models[model_name]
|
| 37 |
+
|
| 38 |
+
# --- Stage 1: The Spark (Inspiration Hub) ---
|
| 39 |
+
# This section already uses a real summarization model, so no changes are needed here.
|
| 40 |
+
# (Functions search_arxiv_papers and summarize_youtube_from_url are omitted for brevity but remain the same)
|
| 41 |
+
|
| 42 |
+
# ... (Previous code for Stage 1 remains here) ...
|
| 43 |
+
|
| 44 |
+
# --- Stage 2: The Craft (Creative Studio) ---
|
| 45 |
+
|
| 46 |
+
def analyze_image_with_ai(image: Image.Image) -> (str, dict):
|
| 47 |
+
"""Uses a real AI model to generate a description of the image."""
|
| 48 |
+
captioner = get_pipeline("image-to-text", "Salesforce/blip-image-captioning-large")
|
| 49 |
+
description = captioner(image)[0]['generated_text']
|
| 50 |
+
|
| 51 |
+
analysis = {"description": description}
|
| 52 |
+
report = (
|
| 53 |
+
f"**π¨ AI Vision Analysis:**\n\n"
|
| 54 |
+
f"- **Image Content:** {description}"
|
| 55 |
+
)
|
| 56 |
+
return report, analysis
|
| 57 |
+
|
| 58 |
+
def generate_creative_content_with_ai(style: str, audience: str, image_analysis: dict, custom_prompt: str) -> (str, str):
|
| 59 |
+
"""Uses a real LLM to generate content based on a detailed prompt."""
|
| 60 |
+
generator = get_pipeline("text-generation", "gpt2")
|
| 61 |
+
image_desc = image_analysis.get("description", "a visual scene")
|
| 62 |
+
|
| 63 |
+
# Create a detailed prompt for the LLM
|
| 64 |
+
prompt = (
|
| 65 |
+
f"Create a '{style}' for a '{audience}' audience. "
|
| 66 |
+
f"The content should be inspired by the following scene: '{image_desc}'. "
|
| 67 |
+
f"Follow this specific instruction: '{custom_prompt if custom_prompt else 'Be creative and engaging'}'.\n\n"
|
| 68 |
+
f"Here is the content:"
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
# Generate text and clean it up
|
| 72 |
+
generated_outputs = generator(prompt, max_length=150, num_return_sequences=1, pad_token_id=generator.tokenizer.eos_token_id)
|
| 73 |
+
generated_text = generated_outputs[0]['generated_text']
|
| 74 |
+
|
| 75 |
+
# Clean the output by removing the initial prompt
|
| 76 |
+
clean_text = generated_text.replace(prompt, "").strip()
|
| 77 |
+
|
| 78 |
+
# The analytics are now informational rather than predictive
|
| 79 |
+
analytics_report = (
|
| 80 |
+
f"**π Generation Details:**\n\n"
|
| 81 |
+
f"- **Model Used:** gpt2\n"
|
| 82 |
+
f"- **Core Prompt:** Based on a photo of '{image_desc[:40]}...'"
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
return clean_text, analytics_report
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
+
def run_creative_studio(uploaded_image, style, audience, custom_prompt):
|
| 88 |
+
"""Interface function to run the full AI-powered 'Craft' stage."""
|
| 89 |
+
if uploaded_image is None:
|
| 90 |
+
return "β Please upload an image.", "", ""
|
| 91 |
+
try:
|
| 92 |
+
image = uploaded_image
|
| 93 |
+
analysis_report, image_analysis = analyze_image_with_ai(image)
|
| 94 |
+
generated_text, analytics = generate_creative_content_with_ai(style, audience, image_analysis, custom_prompt)
|
| 95 |
+
return analysis_report, generated_text, analytics
|
| 96 |
+
except Exception as e:
|
| 97 |
+
return f"β οΈ Error: {e}", "", ""
|
| 98 |
|
| 99 |
+
# --- Stage 3: The Reach (Globalization Suite) ---
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
+
def translate_content_with_ai(text: str, languages: list) -> str:
|
| 102 |
+
"""Translates content using real AI models."""
|
| 103 |
+
if not text:
|
| 104 |
+
return "β Please provide text to translate."
|
| 105 |
+
if not languages:
|
| 106 |
+
return "β Please select at least one language."
|
|
|
|
|
|
|
| 107 |
|
| 108 |
+
lang_model_map = {
|
| 109 |
+
"German π©πͺ": "Helsinki-NLP/opus-mt-en-de",
|
| 110 |
+
"Spanish πͺπΈ": "Helsinki-NLP/opus-mt-en-es",
|
| 111 |
+
"Japanese π―π΅": "Helsinki-NLP/opus-mt-en-jap",
|
| 112 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
+
translations = [f"### π Translated Content\n"]
|
|
|
|
|
|
|
| 115 |
|
| 116 |
+
for lang_name in languages:
|
| 117 |
+
model_name = lang_model_map.get(lang_name)
|
| 118 |
+
if model_name:
|
| 119 |
+
translator = get_pipeline("translation", model_name)
|
| 120 |
+
translated_text = translator(text)[0]['translation_text']
|
| 121 |
+
translations.append(f"**{lang_name.upper()} VERSION:**\n\n{translated_text}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
+
return "\n\n---\n\n".join(translations)
|
| 124 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
+
# --- Full Gradio UI ---
|
| 127 |
+
# The UI structure remains the same, but the functions it calls are now AI-powered.
|
| 128 |
+
# The code for create_ui(), search_arxiv_papers, and summarize_youtube_from_url is omitted here
|
| 129 |
+
# for brevity, as it doesn't change from the previous version. You can just plug the
|
| 130 |
+
# new functions above into your existing app.py file.
|
|
|
|
| 131 |
|
| 132 |
+
# --- Helper functions from previous version to make the file runnable ---
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
|
| 134 |
+
def search_arxiv_papers(topic: str) -> str:
|
| 135 |
+
if not topic: return "β Please enter a topic to search."
|
| 136 |
+
summarizer = get_pipeline("summarization", "sshleifer/distilbart-cnn-12-6")
|
| 137 |
+
search = arxiv.Search(query=topic, max_results=3, sort_by=arxiv.SortCriterion.Relevance)
|
| 138 |
+
results = [f"**π {res.title}**\n\n**Summary:** {summarizer(res.summary.replace(' ', ' '), max_length=80, min_length=20, do_sample=False)[0]['summary_text']}\n\n**π [Read Paper]({res.pdf_url})**" for res in search.results()]
|
| 139 |
+
return "\n\n---\n\n".join(results) if results else "No papers found."
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
+
def summarize_youtube_from_url(video_url: str) -> str:
|
| 142 |
+
if not video_url: return "β Please enter a YouTube URL."
|
| 143 |
+
video_id_match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", video_url)
|
| 144 |
+
if not video_id_match: return "β Invalid YouTube URL."
|
| 145 |
+
video_id = video_id_match.group(1)
|
| 146 |
+
try:
|
| 147 |
+
transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
|
| 148 |
+
transcript_text = " ".join([d['text'] for d in transcript_list])
|
| 149 |
+
if len(transcript_text) < 200: return "Transcript too short."
|
| 150 |
+
summarizer = get_pipeline("summarization", "sshleifer/distilbart-cnn-12-6")
|
| 151 |
+
summary = summarizer(transcript_text, max_length=100, min_length=30, do_sample=False)
|
| 152 |
+
return f"**β
Summary:**\n\n{summary[0]['summary_text']}"
|
| 153 |
+
except NoTranscriptFound: return "β No transcript available."
|
| 154 |
+
except Exception as e: return f"β οΈ Error: {e}"
|
| 155 |
+
|
| 156 |
+
def create_ui():
|
| 157 |
+
css = """.gradio-container { font-family: 'Inter', sans-serif; background: #f5f7fa; } .tab-item { background: white; border-radius: 12px; padding: 25px; border: 1px solid #e0e0e0; } footer { display: none !important }"""
|
| 158 |
+
with gr.Blocks(theme=gr.themes.Base(), css=css, title="The Content Creator's Journey") as app:
|
| 159 |
+
gr.Markdown("""<div style="text-align: center; padding: 20px; background: #1f2937; color: white; border-radius: 12px;"><h1 style="font-size: 2.5em; margin: 0; font-weight: 700;">π The Content Creator's Journey</h1><p style="font-size: 1.2em; margin-top: 5px;">From a spark of an idea to a global message, in three stages.</p></div>""")
|
| 160 |
+
with gr.Tabs() as tabs:
|
| 161 |
+
with gr.TabItem("1. The Spark: Inspiration Hub", id=0, elem_classes=["tab-item"]):
|
| 162 |
+
gr.Markdown("### Every great creation starts with an idea. Research any topic to get summarized insights from academia and popular culture.")
|
| 163 |
+
with gr.Row(variant="panel"):
|
| 164 |
+
with gr.Column(min_width=400):
|
| 165 |
+
gr.Markdown("#### π¬ Academic Insights (from arXiv)")
|
| 166 |
+
inspire_topic = gr.Textbox(label="Enter a Topic to Search Papers", placeholder="e.g., 'sustainable technology'")
|
| 167 |
+
arxiv_btn = gr.Button("Search arXiv")
|
| 168 |
+
inspire_arxiv_output = gr.Markdown()
|
| 169 |
+
with gr.Column(min_width=400):
|
| 170 |
+
gr.Markdown("#### πΊ Video Insights (from YouTube URL)")
|
| 171 |
+
inspire_yt_url = gr.Textbox(label="Paste a YouTube Video URL", placeholder="e.g., 'https://www.youtube.com/watch?v=...'")
|
| 172 |
+
yt_btn = gr.Button("Summarize Video")
|
| 173 |
+
inspire_yt_output = gr.Markdown()
|
| 174 |
+
arxiv_btn.click(fn=search_arxiv_papers, inputs=inspire_topic, outputs=inspire_arxiv_output)
|
| 175 |
+
yt_btn.click(fn=summarize_youtube_from_url, inputs=inspire_yt_url, outputs=inspire_yt_output)
|
| 176 |
+
with gr.TabItem("2. The Craft: Creative Studio", id=1, elem_classes=["tab-item"]):
|
| 177 |
+
gr.Markdown("### Transform your idea into a polished piece of content. Upload a visual anchor and let the AI help you write.")
|
| 178 |
+
with gr.Row(variant="panel"):
|
| 179 |
+
with gr.Column(scale=1):
|
| 180 |
+
craft_image = gr.Image(label="πΌοΈ Upload a Visual Anchor", type="pil")
|
| 181 |
+
craft_style = gr.Dropdown(choices=["βοΈ Blog Post", "π¬ Social Media Caption", "π‘ Video Script Hook"], value="βοΈ Blog Post", label="π Content Format")
|
| 182 |
+
craft_audience = gr.Dropdown(choices=["π Experts", "π§ General Audience", "π©βπ» Tech Enthusiasts"], value="π§ General Audience", label="π₯ Target Audience")
|
| 183 |
+
craft_prompt = gr.Textbox(label="π Key Message or Note", placeholder="e.g., 'Focus on the human element...'")
|
| 184 |
+
craft_btn = gr.Button("π¨ Craft My Content")
|
| 185 |
+
with gr.Column(scale=2):
|
| 186 |
+
craft_analysis_output = gr.Markdown(label="AI Vision Analysis")
|
| 187 |
+
craft_text_output = gr.Textbox(label="βοΈ Generated Content", lines=10)
|
| 188 |
+
craft_analytics_output = gr.Markdown(label="Performance Analytics")
|
| 189 |
+
craft_btn.click(fn=run_creative_studio, inputs=[craft_image, craft_style, craft_audience, craft_prompt], outputs=[craft_analysis_output, craft_text_output, craft_analytics_output])
|
| 190 |
+
with gr.TabItem("3. The Reach: Globalization Suite", id=2, elem_classes=["tab-item"]):
|
| 191 |
+
gr.Markdown("### Your masterpiece is ready. Now, adapt it for a global audience with our translation suite.")
|
| 192 |
+
with gr.Row(variant="panel"):
|
| 193 |
+
with gr.Column(scale=2):
|
| 194 |
+
reach_text_input = gr.Textbox(label="Paste Content Here (from Stage 2)", lines=8)
|
| 195 |
+
reach_lang_select = gr.CheckboxGroup(choices=["German π©πͺ", "Spanish πͺπΈ", "Japanese π―π΅"], label="π Select Languages")
|
| 196 |
+
reach_btn = gr.Button("π Globalize My Content")
|
| 197 |
+
with gr.Column(scale=3):
|
| 198 |
+
reach_output = gr.Markdown(label="Adapted for Global Audiences")
|
| 199 |
+
reach_btn.click(fn=translate_content_with_ai, inputs=[reach_text_input, reach_lang_select], outputs=reach_output)
|
| 200 |
+
return app
|
| 201 |
|
| 202 |
+
if __name__ == "__main__":
|
| 203 |
+
# To enable lazy loading of models, the UI is created first.
|
| 204 |
+
# The get_pipeline function will handle model loading upon first use.
|
| 205 |
+
app = create_ui()
|
| 206 |
+
app.launch(debug=True)
|