Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from gtts import gTTS | |
| import io | |
| import tempfile | |
| import os | |
| import json | |
| # Configuration (since we don't have the config.py file) | |
| MODEL_CONFIG = { | |
| "models": { | |
| "granite-3b": "ibm-granite/granite-3b-code-base", | |
| "granite-8b": "ibm-granite/granite-8b-code-base" | |
| }, | |
| "generation_params": { | |
| "max_new_tokens": 512, | |
| "temperature": 0.7, | |
| "do_sample": True, | |
| "pad_token_id": None | |
| } | |
| } | |
| TTS_CONFIG = { | |
| "engine": "gtts", | |
| "voice_speed": 150, | |
| "voice_volume": 0.9 | |
| } | |
| TONE_PROMPTS = { | |
| "Neutral": "Rewrite the following text in a clear, neutral tone suitable for audiobook narration:", | |
| "Suspenseful": "Rewrite the following text with suspenseful, engaging language that builds tension:", | |
| "Inspiring": "Rewrite the following text in an inspiring, motivational tone that uplifts the reader:" | |
| } | |
| # Global variables to store model | |
| model = None | |
| tokenizer = None | |
| model_loaded = False | |
| def load_granite_model(model_name="granite-3b"): | |
| """Load IBM Granite model locally""" | |
| global model, tokenizer, model_loaded | |
| model_id = MODEL_CONFIG["models"][model_name] | |
| try: | |
| # Load tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| # Load model | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
| device_map="auto" if torch.cuda.is_available() else None, | |
| trust_remote_code=True | |
| ) | |
| model_loaded = True | |
| return "✅ Model loaded successfully!" | |
| except Exception as e: | |
| model_loaded = False | |
| return f"❌ Error loading model: {str(e)}" | |
| def rewrite_text_with_granite(text, tone): | |
| """Rewrite text using local Granite model""" | |
| global model, tokenizer, model_loaded | |
| if not model_loaded or model is None or tokenizer is None: | |
| return text | |
| try: | |
| # Create prompt | |
| prompt = f"{TONE_PROMPTS[tone]}\n\nOriginal text: {text}\n\nRewritten text:" | |
| # Tokenize | |
| inputs = tokenizer( | |
| prompt, | |
| return_tensors="pt", | |
| truncation=True, | |
| max_length=1024 | |
| ) | |
| # Set pad_token_id for generation | |
| generation_params = MODEL_CONFIG["generation_params"].copy() | |
| generation_params["pad_token_id"] = tokenizer.pad_token_id | |
| # Generate | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| inputs.input_ids, | |
| **generation_params, | |
| attention_mask=inputs.attention_mask | |
| ) | |
| # Decode | |
| generated_text = tokenizer.decode( | |
| outputs[0], | |
| skip_special_tokens=True | |
| ) | |
| # Extract only the rewritten part | |
| if "Rewritten text:" in generated_text: | |
| rewritten = generated_text.split("Rewritten text:")[-1].strip() | |
| else: | |
| rewritten = generated_text[len(prompt):].strip() | |
| return rewritten if rewritten else text | |
| except Exception as e: | |
| return f"Error rewriting text: {str(e)}" | |
| def generate_audio_gtts(text, language='en'): | |
| """Generate audio using Google Text-to-Speech""" | |
| try: | |
| tts = gTTS(text=text, lang=language, slow=False) | |
| # Save to temporary file and return path | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as tmp_file: | |
| tts.save(tmp_file.name) | |
| return tmp_file.name | |
| except Exception as e: | |
| return None | |
| def process_audiobook(input_text, uploaded_file, tone, model_choice): | |
| """Main processing function""" | |
| global model_loaded | |
| # Check if model is loaded | |
| if not model_loaded: | |
| return ( | |
| "❌ Please load the AI model first!", | |
| None, | |
| None, | |
| "Please click 'Load Model' button first." | |
| ) | |
| # Determine input text | |
| text_to_process = "" | |
| if uploaded_file is not None: | |
| try: | |
| # Read uploaded file | |
| content = uploaded_file.read() | |
| if isinstance(content, bytes): | |
| text_to_process = content.decode('utf-8') | |
| else: | |
| text_to_process = str(content) | |
| except Exception as e: | |
| return f"Error reading file: {str(e)}", None, None, "" | |
| elif input_text: | |
| text_to_process = input_text | |
| else: | |
| return "Please provide text input or upload a file.", None, None, "" | |
| # Truncate if too long | |
| if len(text_to_process) > 2000: | |
| text_to_process = text_to_process[:2000] | |
| status_msg = "⚠️ Text truncated to 2000 characters for optimal processing." | |
| else: | |
| status_msg = f"✅ Processing {len(text_to_process)} characters." | |
| # Rewrite text with AI | |
| try: | |
| rewritten_text = rewrite_text_with_granite(text_to_process, tone) | |
| except Exception as e: | |
| return f"Error in text rewriting: {str(e)}", None, None, "" | |
| # Generate audio | |
| try: | |
| audio_file_path = generate_audio_gtts(rewritten_text) | |
| if audio_file_path is None: | |
| return status_msg, text_to_process, rewritten_text, "❌ Failed to generate audio." | |
| except Exception as e: | |
| return status_msg, text_to_process, rewritten_text, f"Error generating audio: {str(e)}" | |
| return ( | |
| status_msg, | |
| text_to_process, | |
| rewritten_text, | |
| audio_file_path | |
| ) | |
| def get_model_status(): | |
| """Get current model status""" | |
| global model_loaded | |
| if model_loaded: | |
| device = "GPU" if torch.cuda.is_available() else "CPU" | |
| return f"✅ Model loaded on {device}" | |
| else: | |
| return "❌ Model not loaded" | |
| # Create Gradio interface | |
| def create_interface(): | |
| with gr.Blocks( | |
| title="EchoVerse - Local AI Audiobook Creator", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .gradio-container { | |
| font-family: 'Arial', sans-serif; | |
| } | |
| .main-header { | |
| text-align: center; | |
| color: #2E86AB; | |
| margin-bottom: 20px; | |
| } | |
| .status-box { | |
| padding: 10px; | |
| border-radius: 5px; | |
| margin: 10px 0; | |
| } | |
| """ | |
| ) as demo: | |
| # Header | |
| gr.HTML(""" | |
| <div class="main-header"> | |
| <h1>��� EchoVerse Local</h1> | |
| <h3>Transform Text into Expressive Audiobooks with Local AI</h3> | |
| <p><i>Powered by IBM Granite 3B - No internet required for AI processing!</i></p> | |
| </div> | |
| """) | |
| # Model Setup Section | |
| with gr.Group(): | |
| gr.HTML("<h2>��� AI Model Setup</h2>") | |
| with gr.Row(): | |
| model_choice = gr.Dropdown( | |
| choices=list(MODEL_CONFIG["models"].keys()), | |
| value="granite-3b", | |
| label="Choose Granite Model", | |
| info="3B model is recommended for most computers. 8B requires more RAM." | |
| ) | |
| load_btn = gr.Button("Load Model", variant="primary") | |
| model_status = gr.Textbox( | |
| label="Model Status", | |
| value="❌ Model not loaded", | |
| interactive=False | |
| ) | |
| # Input Section | |
| with gr.Group(): | |
| gr.HTML("<h2>��� Input Your Content</h2>") | |
| uploaded_file = gr.File( | |
| label="Upload a text file", | |
| file_types=[".txt"], | |
| type="binary" | |
| ) | |
| input_text = gr.Textbox( | |
| label="Or paste your text here:", | |
| lines=8, | |
| placeholder="Enter the text you want to convert to an audiobook...", | |
| max_lines=15 | |
| ) | |
| # Configuration Section | |
| with gr.Group(): | |
| gr.HTML("<h2>⚙️ Audio Configuration</h2>") | |
| with gr.Row(): | |
| tone = gr.Dropdown( | |
| choices=["Neutral", "Suspenseful", "Inspiring"], | |
| value="Neutral", | |
| label="Select Tone", | |
| info="Choose how you want the text to be rewritten" | |
| ) | |
| # Generate Button | |
| generate_btn = gr.Button("��� Generate Audiobook", variant="primary", size="lg") | |
| # Results Section | |
| with gr.Group(): | |
| gr.HTML("<h2>��� Results</h2>") | |
| status_output = gr.Textbox( | |
| label="Status", | |
| interactive=False | |
| ) | |
| with gr.Row(): | |
| original_text = gr.Textbox( | |
| label="Original Text", | |
| lines=10, | |
| interactive=False | |
| ) | |
| rewritten_text = gr.Textbox( | |
| label="Rewritten Text", | |
| lines=10, | |
| interactive=False | |
| ) | |
| # Audio Output | |
| gr.HTML("<h2>��� Your Audiobook</h2>") | |
| audio_output = gr.Audio( | |
| label="Generated Audiobook", | |
| type="filepath" | |
| ) | |
| # System Info | |
| with gr.Group(): | |
| gr.HTML("<h2>��� System Info</h2>") | |
| system_info = gr.HTML(f""" | |
| <div> | |
| <p><strong>GPU Available:</strong> {'✅ Yes' if torch.cuda.is_available() else '❌ No (CPU only)'}</p> | |
| <p><strong>TTS Engine:</strong> {TTS_CONFIG['engine']}</p> | |
| </div> | |
| <h3>��� Tips</h3> | |
| <ul> | |
| <li>First model load takes time</li> | |
| <li>3B model: ~6GB RAM needed</li> | |
| <li>8B model: ~16GB RAM needed</li> | |
| <li>GPU greatly speeds up processing</li> | |
| <li>gTTS requires internet connection</li> | |
| </ul> | |
| """) | |
| # Event handlers | |
| load_btn.click( | |
| fn=load_granite_model, | |
| inputs=[model_choice], | |
| outputs=[model_status] | |
| ) | |
| generate_btn.click( | |
| fn=process_audiobook, | |
| inputs=[input_text, uploaded_file, tone, model_choice], | |
| outputs=[status_output, original_text, rewritten_text, audio_output] | |
| ) | |
| return demo | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo = create_interface() | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False | |
| ) | |