09van commited on
Commit
e2b6d23
·
verified ·
1 Parent(s): d2e6e43

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +239 -230
app.py CHANGED
@@ -1,231 +1,240 @@
1
- import os
2
- import re
3
- import json
4
- import io
5
- import datetime as dt
6
- import pandas as pd
7
- import streamlit as st
8
- from dotenv import load_dotenv
9
- import google.generativeai as genai
10
-
11
- # --- Setup ---
12
- load_dotenv()
13
- DEFAULT_MODEL = "gemini-1.5-flash"
14
-
15
- def configure_gemini(api_key: str):
16
- """Initializes the Gemini client with the provided API key."""
17
- genai.configure(api_key=api_key)
18
-
19
- # --- Utilities ---
20
- def extract_json(text: str) -> dict:
21
- """
22
- Pulls a JSON object from a string, even if it's wrapped in markdown code fences.
23
- Returns a Python dictionary or raises an error if parsing fails.
24
- """
25
- if not text:
26
- raise ValueError("Received an empty response from the model.")
27
-
28
- # Look for JSON within ```json ... ``` markdown block
29
- match = re.search(r"```json\s*(.*?)\s*```", text, re.DOTALL)
30
- if match:
31
- json_str = match.group(1)
32
- else:
33
- # Fallback: find the first '{' and last '}'
34
- start = text.find('{')
35
- end = text.rfind('}')
36
- if start == -1 or end == -1:
37
- raise json.JSONDecodeError("No JSON object found in the response text.", text, 0)
38
- json_str = text[start:end+1]
39
-
40
- return json.loads(json_str)
41
-
42
- def seconds_to_ts(s: int) -> str:
43
- """Converts an integer of seconds to a MM:SS timestamp string."""
44
- m, sec = divmod(int(s), 60)
45
- return f"{m:02d}:{sec:02d}"
46
-
47
- def make_prompt(topic: str, idea_count: int, total_seconds: int, scene_count: int) -> str:
48
- """Creates the detailed, structured prompt for the generative model."""
49
- return f"""
50
- You are a YouTube Shorts producer for "Contentmaniacs" (nature, cosmos, paradoxes, AI).
51
- Goal: Create viral, factual, poetic-science Shorts with clear visuals.
52
-
53
- Generate EXACTLY {idea_count} ideas for topic: "{topic}".
54
-
55
- Return ONLY a single, valid JSON object (no markdown). The root object must contain one key, "ideas", which is a list of idea objects.
56
-
57
- The schema for each idea object in the list is:
58
- {{
59
- "title": "string (<= 60 chars, no quotes)",
60
- "keywords": ["kw1","kw2","kw3"],
61
- "description": "1–2 lines, SEO-rich, natural language",
62
- "hashtags": ["Shorts","YouTubeShorts","Contentmaniacs","<up to 7 topical>"],
63
- "thumbnail_prompt": "clear 9:16 visual brief (no text)",
64
- "video_plan": {{
65
- "duration_seconds": {total_seconds},
66
- "scenes_count": {scene_count},
67
- "scenes": [
68
- {{
69
- "scene_no": 1,
70
- "start_sec": 0,
71
- "end_sec": 0,
72
- "voiceover": "1–2 punchy lines, simple language",
73
- "on_screen_text": "few words, optional, no hashtags",
74
- "visual_direction": "what to show (subject, motion, environment, mood, lighting)",
75
- "shot_type": "macro | wide | medium | timelapse | drone | slow-mo | infographic",
76
- "prompt": "text-to-video/image prompt for Canva/Runway (no text overlay)",
77
- "broll_ideas": ["alt idea 1","alt idea 2"],
78
- "sfx_music": "sound design notes (subtle, cinematic, ambient, etc.)"
79
- }}
80
- ]
81
- }},
82
- "full_transcript": "Combine all voiceover lines into a clean 45–60s transcript."
83
- }}
84
-
85
- RULES:
86
- - Factual, inspiring, no clickbait lies.
87
- - Keep each scene's voiceover short (<= 18 words).
88
- - Distribute time evenly across scenes so end_sec of last scene == duration_seconds.
89
- - Output MUST be a single, valid JSON object only.
90
- """
91
-
92
- def idea_json_to_overview_rows(topic: str, idea: dict) -> dict:
93
- """Creates a dictionary for the overview DataFrame from a single idea JSON."""
94
- return {
95
- "Topic": topic,
96
- "Title": (idea.get("title") or "").strip(),
97
- "Keywords": ", ".join(idea.get("keywords") or []),
98
- "Description": (idea.get("description") or "").strip(),
99
- "Hashtags": " ".join(("#" + h.lstrip("#")) for h in (idea.get("hashtags") or [])),
100
- "ThumbnailPrompt": (idea.get("thumbnail_prompt") or "").strip(),
101
- "DurationSec": idea.get("video_plan", {}).get("duration_seconds", "")
102
- }
103
-
104
- def idea_json_to_scenes_df(topic: str, idea: dict) -> pd.DataFrame:
105
- """Creates a DataFrame for the scene-by-scene shot list."""
106
- scenes = idea.get("video_plan", {}).get("scenes", []) or []
107
- rows = []
108
- for sc in scenes:
109
- rows.append({
110
- "Topic": topic,
111
- "Title": idea.get("title", ""),
112
- "SceneNo": sc.get("scene_no", ""),
113
- "Start": seconds_to_ts(sc.get("start_sec", 0)),
114
- "End": seconds_to_ts(sc.get("end_sec", 0)),
115
- "Voiceover": (sc.get("voiceover") or "").strip(),
116
- "OnScreenText": (sc.get("on_screen_text") or "").strip(),
117
- "VisualDirection": (sc.get("visual_direction") or "").strip(),
118
- "ShotType": (sc.get("shot_type") or "").strip(),
119
- "Prompt": (sc.get("prompt") or "").strip(),
120
- "BrollIdeas": ", ".join(sc.get("broll_ideas") or []),
121
- "SFX_Music": (sc.get("sfx_music") or "").strip()
122
- })
123
- return pd.DataFrame(rows)
124
-
125
- def df_to_csv_bytes(df: pd.DataFrame) -> bytes:
126
- """Converts a DataFrame to UTF-8 encoded CSV bytes for downloading."""
127
- return df.to_csv(index=False).encode("utf-8")
128
-
129
- def transcript_bytes(title: str, transcript: str) -> bytes:
130
- """Creates bytes for a simple text file containing the title and transcript."""
131
- content = f"TITLE\n{title}\n\nFULL TRANSCRIPT\n{transcript}\n"
132
- return content.encode("utf-8")
133
-
134
- # --- Streamlit UI ---
135
- st.set_page_config(page_title="Contentmaniacs Producer", page_icon="🎬", layout="wide")
136
- st.title("🎬 Contentmaniacs — Shorts Producer")
137
- st.caption("Generate ideas → transcript → scene prompts with one click.")
138
-
139
- # API key
140
- env_key = os.getenv("GEMINI_API_KEY", "")
141
- api_key = st.text_input("Gemini API Key", value=env_key, type="password", help="Find your key at makersuite.google.com")
142
-
143
- # Inputs
144
- c1, c2, c3, c4 = st.columns([2, 1, 1, 1])
145
- with c1:
146
- topic = st.text_input("Topic", placeholder="Cosmic paradoxes, Deep ocean mysteries, AI vs Humans…")
147
- with c2:
148
- idea_count = st.number_input("Ideas", min_value=1, max_value=5, value=1, step=1)
149
- with c3:
150
- total_seconds = st.number_input("Video length (sec)", min_value=30, max_value=90, value=60, step=5)
151
- with c4:
152
- scene_count = st.number_input("Scenes", min_value=3, max_value=10, value=6, step=1)
153
-
154
- model_name = st.selectbox("Model", [DEFAULT_MODEL, "gemini-1.5-pro"], index=0)
155
- go = st.button("✨ Generate")
156
-
157
- if go:
158
- if not api_key:
159
- st.error("Please paste your Gemini API key.")
160
- st.stop()
161
- if not topic.strip():
162
- st.error("Please enter a topic.")
163
- st.stop()
164
-
165
- try:
166
- configure_gemini(api_key)
167
- model = genai.GenerativeModel(model_name)
168
- prompt = make_prompt(topic.strip(), int(idea_count), int(total_seconds), int(scene_count))
169
-
170
- with st.spinner("Producing ideas, transcript and scenes…"):
171
- resp = model.generate_content(prompt)
172
- data = extract_json(resp.text)
173
-
174
- ideas = data.get("ideas", [])
175
- if not ideas:
176
- st.warning("No ideas returned. Try again with a simpler topic or check the model's response format.")
177
- st.stop()
178
-
179
- # Display each idea in its own tab
180
- tab_names = [f"Idea {i+1}" for i in range(len(ideas))]
181
- tabs = st.tabs(tab_names)
182
- ts = dt.datetime.now().strftime("%Y%m%d_%H%M%S")
183
-
184
- for i, (tab, idea) in enumerate(zip(tabs, ideas), start=1):
185
- with tab:
186
- overview_row = idea_json_to_overview_rows(topic.strip(), idea)
187
- scenes_df = idea_json_to_scenes_df(topic.strip(), idea)
188
- transcript = idea.get("full_transcript", "").strip()
189
- title = overview_row["Title"]
190
-
191
- st.subheader("Overview")
192
- st.dataframe(pd.DataFrame([overview_row]), use_container_width=True)
193
-
194
- st.subheader("Scenes / Shot List")
195
- st.dataframe(scenes_df, use_container_width=True)
196
-
197
- # Download buttons
198
- colA, colB, colC = st.columns(3)
199
- file_prefix = f"idea{i}_{ts}"
200
- with colA:
201
- st.download_button(
202
- "⬇️ Download Scenes CSV",
203
- data=df_to_csv_bytes(scenes_df),
204
- file_name=f"scenes_{file_prefix}.csv",
205
- mime="text/csv"
206
- )
207
- with colB:
208
- st.download_button(
209
- "⬇️ Download Transcript TXT",
210
- data=transcript_bytes(title, transcript),
211
- file_name=f"transcript_{file_prefix}.txt",
212
- mime="text/plain"
213
- )
214
- with colC:
215
- st.download_button(
216
- "⬇️ Download Overview CSV",
217
- data=df_to_csv_bytes(pd.DataFrame([overview_row])),
218
- file_name=f"overview_{file_prefix}.csv",
219
- mime="text/csv"
220
- )
221
-
222
- with st.expander("👀 Quick copy: Transcript"):
223
- st.code(transcript or "No transcript returned.", language="markdown")
224
-
225
- with st.expander("🎯 Thumbnail Prompt"):
226
- st.markdown(overview_row["ThumbnailPrompt"] or "_No prompt returned._")
227
-
228
- except json.JSONDecodeError:
229
- st.error("The model response wasn’t valid JSON. Click 'Generate' again or try a simpler topic.")
230
- except Exception as e:
 
 
 
 
 
 
 
 
 
231
  st.error(f"An unexpected error occurred: {e}")
 
1
+ import os
2
+ import re
3
+ import json
4
+ import io
5
+ import datetime as dt
6
+ import pandas as pd
7
+ import streamlit as st
8
+ from dotenv import load_dotenv
9
+ import google.generativeai as genai
10
+
11
+ # --- Setup ---
12
+ load_dotenv()
13
+ DEFAULT_MODEL = "gemini-1.5-flash"
14
+
15
+ def configure_gemini(api_key: str):
16
+ """Initializes the Gemini client with the provided API key."""
17
+ genai.configure(api_key=api_key)
18
+
19
+ # --- Utilities ---
20
+ def extract_json(text: str) -> dict:
21
+ """
22
+ Pulls a JSON object from a string, even if it's wrapped in markdown code fences.
23
+ Returns a Python dictionary or raises an error if parsing fails.
24
+ """
25
+ if not text:
26
+ raise ValueError("Received an empty response from the model.")
27
+
28
+ # Look for JSON within ```json ... ``` markdown block
29
+ match = re.search(r"```json\s*(.*?)\s*```", text, re.DOTALL)
30
+ if match:
31
+ json_str = match.group(1)
32
+ else:
33
+ # Fallback: find the first '{' and last '}'
34
+ start = text.find('{')
35
+ end = text.rfind('}')
36
+ if start == -1 or end == -1:
37
+ raise json.JSONDecodeError("No JSON object found in the response text.", text, 0)
38
+ json_str = text[start:end+1]
39
+
40
+ return json.loads(json_str)
41
+
42
+ def seconds_to_ts(s: int) -> str:
43
+ """Converts an integer of seconds to a MM:SS timestamp string."""
44
+ m, sec = divmod(int(s), 60)
45
+ return f"{m:02d}:{sec:02d}"
46
+
47
+ def make_prompt(topic: str, idea_count: int, total_seconds: int, scene_count: int) -> str:
48
+ """Creates the detailed, structured prompt for the generative model."""
49
+ return f"""
50
+ You are a YouTube Shorts producer for "Contentmaniacs" (nature, cosmos, paradoxes, AI).
51
+ Goal: Create viral, factual, poetic-science Shorts with clear visuals.
52
+
53
+ Generate EXACTLY {idea_count} ideas for topic: "{topic}".
54
+
55
+ Return ONLY a single, valid JSON object (no markdown). The root object must contain one key, "ideas", which is a list of idea objects.
56
+
57
+ The schema for each idea object in the list is:
58
+ {{
59
+ "title": "string (<= 60 chars, no quotes)",
60
+ "keywords": ["kw1","kw2","kw3"],
61
+ "description": "1–2 lines, SEO-rich, natural language",
62
+ "hashtags": ["Shorts","YouTubeShorts","Contentmaniacs","<up to 7 topical>"],
63
+ "thumbnail_prompt": "clear 9:16 visual brief (no text)",
64
+ "video_plan": {{
65
+ "duration_seconds": {total_seconds},
66
+ "scenes_count": {scene_count},
67
+ "scenes": [
68
+ {{
69
+ "scene_no": 1,
70
+ "start_sec": 0,
71
+ "end_sec": 0,
72
+ "voiceover": "1–2 punchy lines, simple language",
73
+ "on_screen_text": "few words, optional, no hashtags",
74
+ "visual_direction": "what to show (subject, motion, environment, mood, lighting)",
75
+ "shot_type": "macro | wide | medium | timelapse | drone | slow-mo | infographic",
76
+ "prompt": "text-to-video/image prompt for Canva/Runway (no text overlay)",
77
+ "broll_ideas": ["alt idea 1","alt idea 2"],
78
+ "sfx_music": "sound design notes (subtle, cinematic, ambient, etc.)"
79
+ }}
80
+ ]
81
+ }},
82
+ "full_transcript": "Combine all voiceover lines into a clean 45–60s transcript."
83
+ }}
84
+
85
+ RULES:
86
+ - Factual, inspiring, no clickbait lies.
87
+ - Keep each scene's voiceover short (<= 18 words).
88
+ - Distribute time evenly across scenes so end_sec of last scene == duration_seconds.
89
+ - Output MUST be a single, valid JSON object only.
90
+ """
91
+
92
+ def idea_json_to_overview_rows(topic: str, idea: dict) -> dict:
93
+ """Creates a dictionary for the overview DataFrame from a single idea JSON."""
94
+ return {
95
+ "Topic": topic,
96
+ "Title": (idea.get("title") or "").strip(),
97
+ "Keywords": ", ".join(idea.get("keywords") or []),
98
+ "Description": (idea.get("description") or "").strip(),
99
+ "Hashtags": " ".join(("#" + h.lstrip("#")) for h in (idea.get("hashtags") or [])),
100
+ "ThumbnailPrompt": (idea.get("thumbnail_prompt") or "").strip(),
101
+ "DurationSec": idea.get("video_plan", {}).get("duration_seconds", "")
102
+ }
103
+
104
+ def idea_json_to_scenes_df(topic: str, idea: dict) -> pd.DataFrame:
105
+ """Creates a DataFrame for the scene-by-scene shot list."""
106
+ scenes = idea.get("video_plan", {}).get("scenes", []) or []
107
+ rows = []
108
+ for sc in scenes:
109
+ rows.append({
110
+ "Topic": topic,
111
+ "Title": idea.get("title", ""),
112
+ "SceneNo": sc.get("scene_no", ""),
113
+ "Start": seconds_to_ts(sc.get("start_sec", 0)),
114
+ "End": seconds_to_ts(sc.get("end_sec", 0)),
115
+ "Voiceover": (sc.get("voiceover") or "").strip(),
116
+ "OnScreenText": (sc.get("on_screen_text") or "").strip(),
117
+ "VisualDirection": (sc.get("visual_direction") or "").strip(),
118
+ "ShotType": (sc.get("shot_type") or "").strip(),
119
+ "Prompt": (sc.get("prompt") or "").strip(),
120
+ "BrollIdeas": ", ".join(sc.get("broll_ideas") or []),
121
+ "SFX_Music": (sc.get("sfx_music") or "").strip()
122
+ })
123
+ return pd.DataFrame(rows)
124
+
125
+ def df_to_csv_bytes(df: pd.DataFrame) -> bytes:
126
+ """Converts a DataFrame to UTF-8 encoded CSV bytes for downloading."""
127
+ return df.to_csv(index=False).encode("utf-8")
128
+
129
+ def transcript_bytes(title: str, transcript: str) -> bytes:
130
+ """Creates bytes for a simple text file containing the title and transcript."""
131
+ content = f"TITLE\n{title}\n\nFULL TRANSCRIPT\n{transcript}\n"
132
+ return content.encode("utf-8")
133
+
134
+ # --- Streamlit UI ---
135
+ st.set_page_config(page_title="Contentmaniacs Producer", page_icon="🎬", layout="wide")
136
+ st.title("🎬 Contentmaniacs — Shorts Producer")
137
+ st.caption("Generate ideas → transcript → scene prompts with one click.")
138
+
139
+ # Load Gemini API key from Hugging Face Secrets or local .env file
140
+ try:
141
+ # This is for deployed apps on Hugging Face
142
+ api_key = st.secrets["GEMINI_API_KEY"]
143
+ except (KeyError, FileNotFoundError):
144
+ # This is for local development
145
+ api_key = os.getenv("GEMINI_API_KEY", "")
146
+
147
+ if not api_key:
148
+ st.error("⚠️ Gemini API key is missing! Please set it in your .env file locally, or in the Hugging Face Space secrets.")
149
+ st.stop()
150
+
151
+ # Inputs
152
+ c1, c2, c3, c4 = st.columns([2, 1, 1, 1])
153
+ with c1:
154
+ topic = st.text_input("Topic", placeholder="Cosmic paradoxes, Deep ocean mysteries, AI vs Humans…")
155
+ with c2:
156
+ idea_count = st.number_input("Ideas", min_value=1, max_value=5, value=1, step=1)
157
+ with c3:
158
+ total_seconds = st.number_input("Video length (sec)", min_value=30, max_value=90, value=60, step=5)
159
+ with c4:
160
+ scene_count = st.number_input("Scenes", min_value=3, max_value=10, value=6, step=1)
161
+
162
+ model_name = st.selectbox("Model", [DEFAULT_MODEL, "gemini-1.5-pro"], index=0)
163
+ go = st.button("✨ Generate")
164
+
165
+ if go:
166
+ if not api_key:
167
+ st.error("Please paste your Gemini API key.")
168
+ st.stop()
169
+ if not topic.strip():
170
+ st.error("Please enter a topic.")
171
+ st.stop()
172
+
173
+ try:
174
+ configure_gemini(api_key)
175
+ model = genai.GenerativeModel(model_name)
176
+ prompt = make_prompt(topic.strip(), int(idea_count), int(total_seconds), int(scene_count))
177
+
178
+ with st.spinner("Producing ideas, transcript and scenes…"):
179
+ resp = model.generate_content(prompt)
180
+ data = extract_json(resp.text)
181
+
182
+ ideas = data.get("ideas", [])
183
+ if not ideas:
184
+ st.warning("No ideas returned. Try again with a simpler topic or check the model's response format.")
185
+ st.stop()
186
+
187
+ # Display each idea in its own tab
188
+ tab_names = [f"Idea {i+1}" for i in range(len(ideas))]
189
+ tabs = st.tabs(tab_names)
190
+ ts = dt.datetime.now().strftime("%Y%m%d_%H%M%S")
191
+
192
+ for i, (tab, idea) in enumerate(zip(tabs, ideas), start=1):
193
+ with tab:
194
+ overview_row = idea_json_to_overview_rows(topic.strip(), idea)
195
+ scenes_df = idea_json_to_scenes_df(topic.strip(), idea)
196
+ transcript = idea.get("full_transcript", "").strip()
197
+ title = overview_row["Title"]
198
+
199
+ st.subheader("Overview")
200
+ st.dataframe(pd.DataFrame([overview_row]), use_container_width=True)
201
+
202
+ st.subheader("Scenes / Shot List")
203
+ st.dataframe(scenes_df, use_container_width=True)
204
+
205
+ # Download buttons
206
+ colA, colB, colC = st.columns(3)
207
+ file_prefix = f"idea{i}_{ts}"
208
+ with colA:
209
+ st.download_button(
210
+ "⬇️ Download Scenes CSV",
211
+ data=df_to_csv_bytes(scenes_df),
212
+ file_name=f"scenes_{file_prefix}.csv",
213
+ mime="text/csv"
214
+ )
215
+ with colB:
216
+ st.download_button(
217
+ "⬇️ Download Transcript TXT",
218
+ data=transcript_bytes(title, transcript),
219
+ file_name=f"transcript_{file_prefix}.txt",
220
+ mime="text/plain"
221
+ )
222
+ with colC:
223
+ st.download_button(
224
+ "⬇️ Download Overview CSV",
225
+ data=df_to_csv_bytes(pd.DataFrame([overview_row])),
226
+ file_name=f"overview_{file_prefix}.csv",
227
+ mime="text/csv"
228
+ )
229
+
230
+ with st.expander("👀 Quick copy: Transcript"):
231
+ st.code(transcript or "No transcript returned.", language="markdown")
232
+
233
+ with st.expander("🎯 Thumbnail Prompt"):
234
+ st.markdown(overview_row["ThumbnailPrompt"] or "_No prompt returned._")
235
+
236
+ except json.JSONDecodeError:
237
+ st.error("The model response wasn’t valid JSON. Click 'Generate' again or try a simpler topic.")
238
+ st.code(resp.text) # Show the faulty text
239
+ except Exception as e:
240
  st.error(f"An unexpected error occurred: {e}")