import os import time import tempfile import logging import json from typing import Dict, Any, List, Literal import pandas as pd import streamlit as st from pydantic import BaseModel, constr from google import genai logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[logging.StreamHandler()]) logger = logging.getLogger(__name__) st.set_page_config(page_title="Video Ad Analyzer", page_icon="🎬", layout="wide") GEMINI_API_KEY = os.getenv("GEMINI_KEY", "") def configure_gemini() -> genai.Client: if not GEMINI_API_KEY: raise RuntimeError("GEMINI_KEY is not set in environment variables.") return genai.Client(api_key=GEMINI_API_KEY) Timestamp = constr(pattern=r'^\d{2}:\d{2}$') RangeTimestamp = constr(pattern=r'^\d{2}:\d{2}-\d{2}:\d{2}$') Score010 = constr(pattern=r'^(?:10|[0-9])\/10$') class Hook(BaseModel): hook_text: str principle: str advantages: List[str] class StoryboardItem(BaseModel): timeline: Timestamp scene: str visuals: str dialogue: str camera: str sound_effects: str class ScriptLine(BaseModel): timeline: Timestamp dialogue: str class VideoMetric(BaseModel): timestamp: RangeTimestamp element: str current_approach: str effectiveness_score: Score010 notes: str class VideoAnalysis(BaseModel): effectiveness_factors: str psychological_triggers: str target_audience: str video_metrics: List[VideoMetric] class TimestampImprovement(BaseModel): timestamp: RangeTimestamp current_element: str improvement_type: str recommended_change: str expected_impact: str priority: Literal["High", "Medium", "Low"] class AdAnalysis(BaseModel): brief: str caption_details: str hook: Hook framework_analysis: str storyboard: List[StoryboardItem] script: List[ScriptLine] video_analysis: VideoAnalysis timestamp_improvements: List[TimestampImprovement] analyser_prompt = """You are an expert video advertisement analyst. Analyze the provided video and give response conforms EXACTLY to the schema below with no extra text or markdown. Populate: 1. **brief** → A concise summary covering visual style, speaker, target audience, and marketing objective. 2. **caption_details** → Description of captions (color/style/position) or exactly the string `"None"` if not visible. 3. **hook** → - `"hook_text"`: Exact opening line or, if no speech, the precise description of the opening visual. - `"principle"`: Psychological/marketing principle that makes this hook effective. - `"advantages"`: ARRAY of 3–6 concise benefit statements tied to the ad’s value proposition. 4. **framework_analysis** → A detailed block identifying copywriting/psychology/storytelling frameworks (e.g., PAS, AIDA). Highlight use of social proof, urgency, fear, authority, scroll-stopping hooks, loop openers, value positioning, and risk reversals. 5. **storyboard** → ARRAY of 4–10 objects. Each must include: - `"timeline"` in `"MM:SS"` (zero-padded) - `"scene"` (brief) - `"visuals"` (detailed) - `"dialogue"` (exact words; use `""` if none) - `"camera"` (shot/angle) - `"sound_effects"` (or `"None"`) 6. **script** → ARRAY of dialogue objects, each with `"timeline"` (`"MM:SS"`) and `"dialogue"` (exact spoken line). 7. **video_analysis** → OBJECT with: - `"effectiveness_factors"`: Key factors that influence effectiveness - `"psychological_triggers"`: Triggers used (e.g., scarcity, authority) - `"target_audience"`: Audience profile inferred - `"video_metrics"`: ARRAY of objects with: - `"timestamp"`: `"MM:SS-MM:SS"` - `"element"`: The aspect being evaluated (e.g., Hook Strategy) - `"current_approach"`: Description of current execution - `"effectiveness_score"`: String score `"X/10"` (integer X) - `"notes"`: Analytical notes 8. **timestamp_improvements** → ARRAY of recommendation objects with: - `"timestamp"`: `"MM:SS-MM:SS"` - `"current_element"`: Current content of the segment - `"improvement_type"`: Category (e.g., Hook Enhancement) - `"recommended_change"`: Specific recommendation - `"expected_impact"`: Projected effect on metrics or perception - `"priority"`: `"High"`, `"Medium"`, or `"Low"` ⚠️ The output must be strictly matching field names and types, no additional keys, and all timestamps must be zero-padded (`"MM:SS"` for single points, `"MM:SS-MM:SS"` for ranges). """ def analyze_video_only(video_path: str) -> Dict[str, Any]: client = configure_gemini() try: video_file = client.files.upload(file=video_path) while getattr(video_file.state, "name", "") == "PROCESSING": time.sleep(2) video_file = client.files.get(name=video_file.name) if getattr(video_file.state, "name", "") == "FAILED": return {} resp = client.models.generate_content( model="gemini-2.0-flash", contents=[analyser_prompt, video_file], config={"response_mime_type": "application/json"} ) raw = getattr(resp, "text", "") or "" try: model_obj = AdAnalysis.model_validate_json(raw) return model_obj.model_dump() except Exception: try: return json.loads(raw) except Exception: return {} except Exception: return {} def _normalize_list(value: Any) -> List[str]: if value is None: return [] if isinstance(value, list): return [str(v) for v in value] return [s for s in str(value).splitlines() if s.strip()] def _to_dataframe(items: Any, columns_map: Dict[str, str]) -> pd.DataFrame: if not isinstance(items, list) or not items: return pd.DataFrame(columns=list(columns_map.values())) df = pd.DataFrame(items) df = df.rename(columns=columns_map) ordered_cols = [columns_map[k] for k in columns_map.keys() if columns_map[k] in df.columns] df = df.reindex(columns=ordered_cols) return df def _mean_effectiveness(metrics: List[Dict[str, Any]]) -> float: if not metrics: return 0.0 scores = [] for m in metrics: s = str(m.get("effectiveness_score", "0/10")).split("/")[0] try: scores.append(int(s)) except Exception: pass return round(sum(scores) / len(scores), 2) if scores else 0.0 def _search_dataframe(df: pd.DataFrame, query: str) -> pd.DataFrame: if not query or df.empty: return df mask = pd.Series([False]*len(df)) for col in df.columns: mask = mask | df[col].astype(str).str.contains(query, case=False, na=False) return df[mask] def render_analyzer_results(analysis: Dict[str, Any]) -> None: if not isinstance(analysis, dict) or not analysis: st.warning("No analysis available.") return st.markdown(""" """, unsafe_allow_html=True) va = analysis.get("video_analysis", {}) or {} storyboard = analysis.get("storyboard", []) or [] script = analysis.get("script", []) or [] metrics = va.get("video_metrics", []) or [] mean_score = _mean_effectiveness(metrics) mcol1, mcol2, mcol3, mcol4 = st.columns([1,1,1,1]) with mcol1: st.markdown(f'