Spaces:

saimanoj1605
/

hackathon

Runtime error

App Files Files Community

hackathon / app.py

saimanoj1605

create app.py

d39bedc verified 4 months ago

raw

history blame

11 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from gtts import gTTS
	import io
	import tempfile
	import os
	import json

	# Configuration (since we don't have the config.py file)
	MODEL_CONFIG = {
	"models": {
	"granite-3b": "ibm-granite/granite-3b-code-base",
	"granite-8b": "ibm-granite/granite-8b-code-base"
	},
	"generation_params": {
	"max_new_tokens": 512,
	"temperature": 0.7,
	"do_sample": True,
	"pad_token_id": None
	}
	}

	TTS_CONFIG = {
	"engine": "gtts",
	"voice_speed": 150,
	"voice_volume": 0.9
	}

	TONE_PROMPTS = {
	"Neutral": "Rewrite the following text in a clear, neutral tone suitable for audiobook narration:",
	"Suspenseful": "Rewrite the following text with suspenseful, engaging language that builds tension:",
	"Inspiring": "Rewrite the following text in an inspiring, motivational tone that uplifts the reader:"
	}

	# Global variables to store model
	model = None
	tokenizer = None
	model_loaded = False

	def load_granite_model(model_name="granite-3b"):
	"""Load IBM Granite model locally"""
	global model, tokenizer, model_loaded

	model_id = MODEL_CONFIG["models"][model_name]

	try:
	# Load tokenizer
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	# Load model
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	device_map="auto" if torch.cuda.is_available() else None,
	trust_remote_code=True
	)

	model_loaded = True
	return "✅ Model loaded successfully!"
	except Exception as e:
	model_loaded = False
	return f"❌ Error loading model: {str(e)}"

	def rewrite_text_with_granite(text, tone):
	"""Rewrite text using local Granite model"""
	global model, tokenizer, model_loaded

	if not model_loaded or model is None or tokenizer is None:
	return text

	try:
	# Create prompt
	prompt = f"{TONE_PROMPTS[tone]}\n\nOriginal text: {text}\n\nRewritten text:"

	# Tokenize
	inputs = tokenizer(
	prompt,
	return_tensors="pt",
	truncation=True,
	max_length=1024
	)

	# Set pad_token_id for generation
	generation_params = MODEL_CONFIG["generation_params"].copy()
	generation_params["pad_token_id"] = tokenizer.pad_token_id

	# Generate
	with torch.no_grad():
	outputs = model.generate(
	inputs.input_ids,
	**generation_params,
	attention_mask=inputs.attention_mask
	)

	# Decode
	generated_text = tokenizer.decode(
	outputs[0],
	skip_special_tokens=True
	)

	# Extract only the rewritten part
	if "Rewritten text:" in generated_text:
	rewritten = generated_text.split("Rewritten text:")[-1].strip()
	else:
	rewritten = generated_text[len(prompt):].strip()

	return rewritten if rewritten else text

	except Exception as e:
	return f"Error rewriting text: {str(e)}"

	def generate_audio_gtts(text, language='en'):
	"""Generate audio using Google Text-to-Speech"""
	try:
	tts = gTTS(text=text, lang=language, slow=False)

	# Save to temporary file and return path
	with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as tmp_file:
	tts.save(tmp_file.name)
	return tmp_file.name

	except Exception as e:
	return None

	def process_audiobook(input_text, uploaded_file, tone, model_choice):
	"""Main processing function"""
	global model_loaded

	# Check if model is loaded
	if not model_loaded:
	return (
	"❌ Please load the AI model first!",
	None,
	None,
	"Please click 'Load Model' button first."
	)

	# Determine input text
	text_to_process = ""
	if uploaded_file is not None:
	try:
	# Read uploaded file
	content = uploaded_file.read()
	if isinstance(content, bytes):
	text_to_process = content.decode('utf-8')
	else:
	text_to_process = str(content)
	except Exception as e:
	return f"Error reading file: {str(e)}", None, None, ""
	elif input_text:
	text_to_process = input_text
	else:
	return "Please provide text input or upload a file.", None, None, ""

	# Truncate if too long
	if len(text_to_process) > 2000:
	text_to_process = text_to_process[:2000]
	status_msg = "⚠️ Text truncated to 2000 characters for optimal processing."
	else:
	status_msg = f"✅ Processing {len(text_to_process)} characters."

	# Rewrite text with AI
	try:
	rewritten_text = rewrite_text_with_granite(text_to_process, tone)
	except Exception as e:
	return f"Error in text rewriting: {str(e)}", None, None, ""

	# Generate audio
	try:
	audio_file_path = generate_audio_gtts(rewritten_text)
	if audio_file_path is None:
	return status_msg, text_to_process, rewritten_text, "❌ Failed to generate audio."
	except Exception as e:
	return status_msg, text_to_process, rewritten_text, f"Error generating audio: {str(e)}"

	return (
	status_msg,
	text_to_process,
	rewritten_text,
	audio_file_path
	)

	def get_model_status():
	"""Get current model status"""
	global model_loaded
	if model_loaded:
	device = "GPU" if torch.cuda.is_available() else "CPU"
	return f"✅ Model loaded on {device}"
	else:
	return "❌ Model not loaded"

	# Create Gradio interface
	def create_interface():
	with gr.Blocks(
	title="EchoVerse - Local AI Audiobook Creator",
	theme=gr.themes.Soft(),
	css="""
	.gradio-container {
	font-family: 'Arial', sans-serif;
	}
	.main-header {
	text-align: center;
	color: #2E86AB;
	margin-bottom: 20px;
	}
	.status-box {
	padding: 10px;
	border-radius: 5px;
	margin: 10px 0;
	}
	"""
	) as demo:

	# Header
	gr.HTML("""
	<div class="main-header">
	<h1>�� EchoVerse Local</h1>
	<h3>Transform Text into Expressive Audiobooks with Local AI</h3>
	<p><i>Powered by IBM Granite 3B - No internet required for AI processing!</i></p>
	</div>
	""")

	# Model Setup Section
	with gr.Group():
	gr.HTML("<h2>�� AI Model Setup</h2>")

	with gr.Row():
	model_choice = gr.Dropdown(
	choices=list(MODEL_CONFIG["models"].keys()),
	value="granite-3b",
	label="Choose Granite Model",
	info="3B model is recommended for most computers. 8B requires more RAM."
	)

	load_btn = gr.Button("Load Model", variant="primary")

	model_status = gr.Textbox(
	label="Model Status",
	value="❌ Model not loaded",
	interactive=False
	)

	# Input Section
	with gr.Group():
	gr.HTML("<h2>�� Input Your Content</h2>")

	uploaded_file = gr.File(
	label="Upload a text file",
	file_types=[".txt"],
	type="binary"
	)

	input_text = gr.Textbox(
	label="Or paste your text here:",
	lines=8,
	placeholder="Enter the text you want to convert to an audiobook...",
	max_lines=15
	)

	# Configuration Section
	with gr.Group():
	gr.HTML("<h2>⚙️ Audio Configuration</h2>")

	with gr.Row():
	tone = gr.Dropdown(
	choices=["Neutral", "Suspenseful", "Inspiring"],
	value="Neutral",
	label="Select Tone",
	info="Choose how you want the text to be rewritten"
	)

	# Generate Button
	generate_btn = gr.Button("�� Generate Audiobook", variant="primary", size="lg")

	# Results Section
	with gr.Group():
	gr.HTML("<h2>�� Results</h2>")

	status_output = gr.Textbox(
	label="Status",
	interactive=False
	)

	with gr.Row():
	original_text = gr.Textbox(
	label="Original Text",
	lines=10,
	interactive=False
	)

	rewritten_text = gr.Textbox(
	label="Rewritten Text",
	lines=10,
	interactive=False
	)

	# Audio Output
	gr.HTML("<h2>�� Your Audiobook</h2>")
	audio_output = gr.Audio(
	label="Generated Audiobook",
	type="filepath"
	)

	# System Info
	with gr.Group():
	gr.HTML("<h2>�� System Info</h2>")

	system_info = gr.HTML(f"""
	<div>
	<p><strong>GPU Available:</strong> {'✅ Yes' if torch.cuda.is_available() else '❌ No (CPU only)'}</p>
	<p><strong>TTS Engine:</strong> {TTS_CONFIG['engine']}</p>
	</div>

	<h3>�� Tips</h3>
	<ul>
	<li>First model load takes time</li>
	<li>3B model: ~6GB RAM needed</li>
	<li>8B model: ~16GB RAM needed</li>
	<li>GPU greatly speeds up processing</li>
	<li>gTTS requires internet connection</li>
	</ul>
	""")

	# Event handlers
	load_btn.click(
	fn=load_granite_model,
	inputs=[model_choice],
	outputs=[model_status]
	)

	generate_btn.click(
	fn=process_audiobook,
	inputs=[input_text, uploaded_file, tone, model_choice],
	outputs=[status_output, original_text, rewritten_text, audio_output]
	)

	return demo

	# Launch the app
	if __name__ == "__main__":
	demo = create_interface()
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False
	)