Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import torch | |
| import numpy as np | |
| import tempfile | |
| import librosa | |
| import re | |
| from gtts import gTTS | |
| from transformers import pipeline, T5ForConditionalGeneration, T5Tokenizer | |
| import whisper | |
| import sentencepiece # Ensure SentencePiece is imported | |
| # Ensure correct device setting | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"Using device: {device}") | |
| # Load models with error handling | |
| try: | |
| whisper_model = whisper.load_model("small") | |
| except Exception as e: | |
| print(f"Failed to load Whisper model: {e}") | |
| whisper_model = None | |
| t5_tokenizer = T5Tokenizer.from_pretrained("t5-base") | |
| t5_model = T5ForConditionalGeneration.from_pretrained("t5-base").to(device) | |
| try: | |
| sentiment_analyzer = pipeline("text-classification", | |
| model="distilbert-base-uncased-finetuned-sst-2-english", | |
| device=0 if device == "cuda" else -1) | |
| except Exception as e: | |
| print(f"Failed to load sentiment analyzer: {e}") | |
| sentiment_analyzer = None | |
| def speech_to_text(audio_path): | |
| if not whisper_model: | |
| return "Whisper model is not loaded." | |
| try: | |
| result = whisper_model.transcribe(audio_path) | |
| return result["text"].strip() | |
| except Exception as e: | |
| return f"Speech recognition error: {e}" | |
| def process_audio(audio_path): | |
| if not audio_path or not os.path.exists(audio_path): | |
| return "Error: No valid audio file provided.", "", "", "", "", None | |
| try: | |
| original_text = speech_to_text(audio_path) | |
| corrected_text = original_text # Placeholder for grammar correction | |
| return original_text, corrected_text, "", "", "", None | |
| except Exception as e: | |
| return f"Processing error: {e}", "", "", "", "", None | |
| def create_interface(): | |
| with gr.Blocks() as app: | |
| audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Record your speech") | |
| output_text = gr.Textbox(label="Recognized Text") | |
| submit_btn = gr.Button("Analyze Speech") | |
| submit_btn.click(process_audio, inputs=[audio_input], outputs=[output_text]) | |
| return app | |
| if __name__ == "__main__": | |
| app = create_interface() | |
| app.launch(server_port=int(os.getenv("PORT", 7860)), server_name="0.0.0.0") | |