import gradio as gr
import torch
import csv
import os
from transformers import AutoTokenizer, AutoModelForCausalLM

MODEL_NAME = "openbmb/MiniCPM-2B-sft-bf16"
CSV_INPUT = "prompts.csv"
CSV_OUTPUT = "prompts_with_scripts.csv"

print("Loading model...")

tokenizer = AutoTokenizer.from_pretrained(
    MODEL_NAME,
    trust_remote_code=True
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    trust_remote_code=True,
    torch_dtype=torch.float32,
    device_map="cpu"
)

model.eval()

# ============================================================
# FUNCTION TO GENERATE VOICEOVER SCRIPT
# ============================================================
def generate_voiceover_script(prompt_text):
    """Generate 250-word natural voiceover script from technical prompt"""
    
    system_prompt = """You are a professional YouTube script writer. Create a natural, engaging voiceover script from the technical prompt given.

Requirements:
- Length: Approximately 250 words
- Style: Natural, conversational, exciting for YouTube Shorts
- Language: Simple English, easy to speak
- Structure: Hook → Explanation → Details → Conclusion
- No markdown, no quotes, just plain text

Technical prompt: """

    full_prompt = system_prompt + prompt_text
    
    inputs = tokenizer(
        full_prompt,
        return_tensors="pt",
        truncation=True,
        max_length=1024
    )
    
    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=400,  # For ~250 words
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.1
        )
    
    script = tokenizer.decode(
        output[0][inputs.input_ids.shape[1]:],
        skip_special_tokens=True
    )
    
    return script.strip()

# ============================================================
# PROCESS ALL PROMPTS FROM CSV
# ============================================================
def process_all_prompts():
    """Read prompts from CSV, generate scripts, save to new CSV"""
    
    if not os.path.exists(CSV_INPUT):
        return "❌ prompts.csv not found!"
    
    # Read prompts
    prompts = []
    with open(CSV_INPUT, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            prompt = row.get('prompt', row.get('prompt_text', row.get('text', '')))
            if prompt:
                prompts.append(prompt)
    
    if not prompts:
        return "❌ No prompts found in CSV!"
    
    # Process each prompt
    results = []
    total = len(prompts)
    
    for idx, prompt in enumerate(prompts, 1):
        print(f"Processing {idx}/{total}: {prompt[:50]}...")
        
        # Generate script
        script = generate_voiceover_script(prompt)
        
        # Calculate word count
        word_count = len(script.split())
        
        results.append({
            'prompt': prompt,
            'voiceover_script': script,
            'word_count': word_count
        })
        
        print(f"   ✅ Generated {word_count} words")
    
    # Save to CSV
    with open(CSV_OUTPUT, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=['prompt', 'voiceover_script', 'word_count'])
        writer.writeheader()
        writer.writerows(results)
    
    return f"✅ Processed {total} prompts! Saved to {CSV_OUTPUT}"

# ============================================================
# SINGLE PROMPT PROCESSING (for testing)
# ============================================================
def process_single_prompt(prompt_text):
    """Process a single prompt and return script"""
    if not prompt_text:
        return "Please enter a prompt"
    
    script = generate_voiceover_script(prompt_text)
    word_count = len(script.split())
    
    return f"📝 **Script ({word_count} words):**\n\n{script}"

# ============================================================
# GRADIO INTERFACE
# ============================================================
with gr.Blocks(title="Voiceover Script Generator", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # 🎙️ Voiceover Script Generator
    
    **Generate natural 250-word voiceover scripts from technical prompts using MiniCPM**
    
    - Upload `prompts.csv` with a `prompt` column
    - Click "Process All Prompts" to generate scripts
    - Output saved to `prompts_with_scripts.csv`
    """)
    
    with gr.Tabs():
        # Tab 1: Batch Processing
        with gr.TabItem("📁 Batch Processing"):
            gr.Markdown("### Process all prompts from CSV")
            
            process_btn = gr.Button("🚀 Process All Prompts", variant="primary")
            status_output = gr.Textbox(label="Status", lines=5)
            
            process_btn.click(
                fn=process_all_prompts,
                inputs=[],
                outputs=[status_output]
            )
        
        # Tab 2: Single Prompt Testing
        with gr.TabItem("🎤 Single Prompt"):
            gr.Markdown("### Test a single prompt")
            
            prompt_input = gr.Textbox(
                label="Technical Prompt",
                placeholder="Example: planetary gear increasing torque in automotive plant",
                lines=3
            )
            
            generate_btn = gr.Button("Generate Script", variant="primary")
            script_output = gr.Textbox(label="Generated Script", lines=15)
            
            generate_btn.click(
                fn=process_single_prompt,
                inputs=[prompt_input],
                outputs=[script_output]
            )
        
        # Tab 3: Instructions
        with gr.TabItem("📋 Instructions"):
            gr.Markdown("""
            ## How to Use
            
            ### Step 1: Upload CSV
            Create a file named `prompts.csv` with this format:
            
            ```csv
            id,prompt
            1,planetary gear increasing torque in automotive plant
            2,worm gear reducing speed in steel mill
            3,hydraulic cylinder lifting heavy loads in mining operation
            ```
            
            ### Step 2: Run Batch Processing
            Click "Process All Prompts" button
            
            ### Step 3: Download Results
            Output will be saved as `prompts_with_scripts.csv`
            
            ## Output Format
            
            | prompt | voiceover_script | word_count |
            |--------|------------------|------------|
            | planetary gear... | Watch how planetary gear... | 248 |
            
            ## Requirements
            
            - First run: Model downloads (~4GB)
            - Processing time: ~30-60 seconds per prompt
            - 16GB RAM recommended
            """)
    
    # File download section
    gr.Markdown("---")
    gr.Markdown("### 📥 Download Results")
    
    def get_output_file():
        if os.path.exists(CSV_OUTPUT):
            return CSV_OUTPUT
        return None
    
    download_btn = gr.File(label="Download prompts_with_scripts.csv", visible=False)
    
    def check_and_show():
        if os.path.exists(CSV_OUTPUT):
            return gr.update(visible=True, value=CSV_OUTPUT)
        return gr.update(visible=False)
    
    process_btn.click(fn=check_and_show, outputs=[download_btn])

# ============================================================
# LAUNCH
# ============================================================
if __name__ == "__main__":
    print("=" * 50)
    print("🎙️ Voiceover Script Generator")
    print("=" * 50)
    print("🚀 Starting Gradio interface...")
    print("📱 Access at: http://localhost:7860")
    print("=" * 50)
    
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True
    )