| import gradio as gr |
| import torch |
| import csv |
| import os |
| from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
| MODEL_NAME = "openbmb/MiniCPM-2B-sft-bf16" |
| CSV_INPUT = "prompts.csv" |
| CSV_OUTPUT = "prompts_with_scripts.csv" |
|
|
| print("Loading model...") |
|
|
| tokenizer = AutoTokenizer.from_pretrained( |
| MODEL_NAME, |
| trust_remote_code=True |
| ) |
|
|
| model = AutoModelForCausalLM.from_pretrained( |
| MODEL_NAME, |
| trust_remote_code=True, |
| torch_dtype=torch.float32, |
| device_map="cpu" |
| ) |
|
|
| model.eval() |
|
|
| |
| |
| |
| def generate_voiceover_script(prompt_text): |
| """Generate 250-word natural voiceover script from technical prompt""" |
| |
| system_prompt = """You are a professional YouTube script writer. Create a natural, engaging voiceover script from the technical prompt given. |
| |
| Requirements: |
| - Length: Approximately 250 words |
| - Style: Natural, conversational, exciting for YouTube Shorts |
| - Language: Simple English, easy to speak |
| - Structure: Hook β Explanation β Details β Conclusion |
| - No markdown, no quotes, just plain text |
| |
| Technical prompt: """ |
|
|
| full_prompt = system_prompt + prompt_text |
| |
| inputs = tokenizer( |
| full_prompt, |
| return_tensors="pt", |
| truncation=True, |
| max_length=1024 |
| ) |
| |
| with torch.no_grad(): |
| output = model.generate( |
| **inputs, |
| max_new_tokens=400, |
| do_sample=True, |
| temperature=0.7, |
| top_p=0.9, |
| repetition_penalty=1.1 |
| ) |
| |
| script = tokenizer.decode( |
| output[0][inputs.input_ids.shape[1]:], |
| skip_special_tokens=True |
| ) |
| |
| return script.strip() |
|
|
| |
| |
| |
| def process_all_prompts(): |
| """Read prompts from CSV, generate scripts, save to new CSV""" |
| |
| if not os.path.exists(CSV_INPUT): |
| return "β prompts.csv not found!" |
| |
| |
| prompts = [] |
| with open(CSV_INPUT, 'r', encoding='utf-8') as f: |
| reader = csv.DictReader(f) |
| for row in reader: |
| prompt = row.get('prompt', row.get('prompt_text', row.get('text', ''))) |
| if prompt: |
| prompts.append(prompt) |
| |
| if not prompts: |
| return "β No prompts found in CSV!" |
| |
| |
| results = [] |
| total = len(prompts) |
| |
| for idx, prompt in enumerate(prompts, 1): |
| print(f"Processing {idx}/{total}: {prompt[:50]}...") |
| |
| |
| script = generate_voiceover_script(prompt) |
| |
| |
| word_count = len(script.split()) |
| |
| results.append({ |
| 'prompt': prompt, |
| 'voiceover_script': script, |
| 'word_count': word_count |
| }) |
| |
| print(f" β
Generated {word_count} words") |
| |
| |
| with open(CSV_OUTPUT, 'w', newline='', encoding='utf-8') as f: |
| writer = csv.DictWriter(f, fieldnames=['prompt', 'voiceover_script', 'word_count']) |
| writer.writeheader() |
| writer.writerows(results) |
| |
| return f"β
Processed {total} prompts! Saved to {CSV_OUTPUT}" |
|
|
| |
| |
| |
| def process_single_prompt(prompt_text): |
| """Process a single prompt and return script""" |
| if not prompt_text: |
| return "Please enter a prompt" |
| |
| script = generate_voiceover_script(prompt_text) |
| word_count = len(script.split()) |
| |
| return f"π **Script ({word_count} words):**\n\n{script}" |
|
|
| |
| |
| |
| with gr.Blocks(title="Voiceover Script Generator", theme=gr.themes.Soft()) as demo: |
| gr.Markdown(""" |
| # ποΈ Voiceover Script Generator |
| |
| **Generate natural 250-word voiceover scripts from technical prompts using MiniCPM** |
| |
| - Upload `prompts.csv` with a `prompt` column |
| - Click "Process All Prompts" to generate scripts |
| - Output saved to `prompts_with_scripts.csv` |
| """) |
| |
| with gr.Tabs(): |
| |
| with gr.TabItem("π Batch Processing"): |
| gr.Markdown("### Process all prompts from CSV") |
| |
| process_btn = gr.Button("π Process All Prompts", variant="primary") |
| status_output = gr.Textbox(label="Status", lines=5) |
| |
| process_btn.click( |
| fn=process_all_prompts, |
| inputs=[], |
| outputs=[status_output] |
| ) |
| |
| |
| with gr.TabItem("π€ Single Prompt"): |
| gr.Markdown("### Test a single prompt") |
| |
| prompt_input = gr.Textbox( |
| label="Technical Prompt", |
| placeholder="Example: planetary gear increasing torque in automotive plant", |
| lines=3 |
| ) |
| |
| generate_btn = gr.Button("Generate Script", variant="primary") |
| script_output = gr.Textbox(label="Generated Script", lines=15) |
| |
| generate_btn.click( |
| fn=process_single_prompt, |
| inputs=[prompt_input], |
| outputs=[script_output] |
| ) |
| |
| |
| with gr.TabItem("π Instructions"): |
| gr.Markdown(""" |
| ## How to Use |
| |
| ### Step 1: Upload CSV |
| Create a file named `prompts.csv` with this format: |
| |
| ```csv |
| id,prompt |
| 1,planetary gear increasing torque in automotive plant |
| 2,worm gear reducing speed in steel mill |
| 3,hydraulic cylinder lifting heavy loads in mining operation |
| ``` |
| |
| ### Step 2: Run Batch Processing |
| Click "Process All Prompts" button |
| |
| ### Step 3: Download Results |
| Output will be saved as `prompts_with_scripts.csv` |
| |
| ## Output Format |
| |
| | prompt | voiceover_script | word_count | |
| |--------|------------------|------------| |
| | planetary gear... | Watch how planetary gear... | 248 | |
| |
| ## Requirements |
| |
| - First run: Model downloads (~4GB) |
| - Processing time: ~30-60 seconds per prompt |
| - 16GB RAM recommended |
| """) |
| |
| |
| gr.Markdown("---") |
| gr.Markdown("### π₯ Download Results") |
| |
| def get_output_file(): |
| if os.path.exists(CSV_OUTPUT): |
| return CSV_OUTPUT |
| return None |
| |
| download_btn = gr.File(label="Download prompts_with_scripts.csv", visible=False) |
| |
| def check_and_show(): |
| if os.path.exists(CSV_OUTPUT): |
| return gr.update(visible=True, value=CSV_OUTPUT) |
| return gr.update(visible=False) |
| |
| process_btn.click(fn=check_and_show, outputs=[download_btn]) |
|
|
| |
| |
| |
| if __name__ == "__main__": |
| print("=" * 50) |
| print("ποΈ Voiceover Script Generator") |
| print("=" * 50) |
| print("π Starting Gradio interface...") |
| print("π± Access at: http://localhost:7860") |
| print("=" * 50) |
| |
| demo.launch( |
| server_name="0.0.0.0", |
| server_port=7860, |
| share=True |
| ) |