Spaces:

cyberspyde
/

whisper

Runtime error

App Files Files Community

cyberspyde commited on Dec 16, 2025

Commit

c00bf70

1 Parent(s): f213e6f

update

Browse files

Files changed (4) hide show

.gitignore +46 -0
README.md +82 -3
app.py +134 -4
requirements.txt +6 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,46 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual environments
+venv/
+env/
+ENV/
+.venv
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS
+.DS_Store
+Thumbs.db
+# Gradio
+gradio_cached_examples/
+flagged/
+# Model cache
+.cache/
+models/

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-title: Whisper
-emoji: 😻
 colorFrom: gray
 colorTo: indigo
 sdk: gradio
@@ -10,4 +10,83 @@ pinned: false
 license: apache-2.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Whisper Uzbek STT
+emoji: 🎙️
 colorFrom: gray
 colorTo: indigo
 sdk: gradio
 license: apache-2.0
 ---
+# 🎙️ Whisper Uzbek Speech-to-Text
+This Hugging Face Space provides automatic speech recognition (ASR) for Uzbek language using the Whisper model.
+## 🚀 Features
+- **Uzbek Language Support**: Optimized for Uzbek speech recognition
+- **Easy to Use**: Simple interface for recording or uploading audio
+- **Real-time Progress**: Visual feedback during transcription
+- **CPU-Optimized**: Runs efficiently on CPU infrastructure
+- **Comprehensive Logging**: Full logging system for monitoring and debugging
+## 🛠️ Technical Details
+- **Model**: `jmshd/whisper-uz`
+- **Framework**: Gradio 6.1.0
+- **Backend**: PyTorch + Transformers
+- **Processing**: CPU-only (HF Spaces)
+## 📝 Usage
+1. **Record Audio**: Click the microphone icon to record directly in your browser
+2. **Upload Audio**: Or upload an existing audio file
+3. **Transcribe**: Click the "Transcribe" button to convert speech to text
+4. **View Results**: The transcribed text will appear in the output box
+## 🔧 Local Development
+To run this application locally:
+```bash
+# Clone the repository
+git clone <your-repo-url>
+cd whisper
+# Install dependencies
+pip install -r requirements.txt
+# Run the application
+python app.py
+```
+The application will be available at `http://localhost:7860`
+## 📦 Requirements
+- Python 3.8+
+- gradio==6.1.0
+- transformers>=4.30.0
+- torch>=2.0.0
+- torchaudio>=2.0.0
+- accelerate>=0.20.0
+- huggingface_hub>=0.16.0
+## 📊 Logging
+The application includes comprehensive logging:
+- Environment information (PyTorch version, CUDA availability)
+- Model loading status
+- Audio processing details
+- Transcription results and errors
+Logs can be viewed in the Hugging Face Spaces logs tab.
+## 🤝 Contributing
+Contributions are welcome! Feel free to:
+- Report bugs
+- Suggest features
+- Submit pull requests
+## 📄 License
+This project is licensed under the Apache 2.0 License.
+## 🔗 Resources
+- [Hugging Face Spaces Documentation](https://huggingface.co/docs/hub/spaces-config-reference)
+- [Gradio Documentation](https://gradio.app/docs)
+- [Whisper Model Card](https://huggingface.co/jmshd/whisper-uz)

app.py CHANGED Viewed

@@ -1,7 +1,137 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

 import gradio as gr
+import torch
+from transformers import WhisperProcessor, WhisperForConditionalGeneration
+import logging
+import os
+from datetime import datetime
+from huggingface_hub import HfApi
+# Setup logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+MODEL_NAME = "jmshd/whisper-uz"
+# Log environment info
+logger.info(f"Starting Whisper Uzbek STT application")
+logger.info(f"PyTorch version: {torch.__version__}")
+logger.info(f"CUDA available: {torch.cuda.is_available()}")
+logger.info(f"Model: {MODEL_NAME}")
+# Load model and processor
+try:
+    logger.info("Loading processor...")
+    processor = WhisperProcessor.from_pretrained(MODEL_NAME)
+    logger.info("Loading model...")
+    model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME)
+    logger.info("Model and processor loaded successfully")
+except Exception as e:
+    logger.error(f"Error loading model: {str(e)}")
+    raise
+def transcribe(audio, progress=gr.Progress()):
+    """
+    Transcribe audio to text using Whisper model
+    Args:
+        audio: Audio input from Gradio (sample_rate, audio_data)
+        progress: Gradio progress tracker
+    Returns:
+        str: Transcribed text
+    """
+    try:
+        if audio is None:
+            logger.warning("No audio input provided")
+            return "⚠️ No audio provided. Please upload or record audio."
+        progress(0.1, desc="Processing audio...")
+        sample_rate, audio_data = audio
+        logger.info(f"Processing audio - Sample rate: {sample_rate}, Shape: {audio_data.shape}")
+        progress(0.3, desc="Preparing input features...")
+        inputs = processor(
+            audio_data,
+            sampling_rate=sample_rate,
+            return_tensors="pt"
+        )
+        progress(0.5, desc="Generating transcription...")
+        with torch.no_grad():
+            predicted_ids = model.generate(inputs.input_features)
+        progress(0.8, desc="Decoding text...")
+        text = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
+        progress(1.0, desc="Complete!")
+        logger.info(f"Transcription successful - Length: {len(text)} characters")
+        return text
+    except Exception as e:
+        error_msg = f"❌ Error during transcription: {str(e)}"
+        logger.error(error_msg)
+        return error_msg
+# Enhanced Gradio interface
+with gr.Blocks(theme=gr.themes.Soft()) as iface:
+    gr.Markdown(
+        """
+        # 🎙️ Whisper Uzbek Speech-to-Text
+        Transcribe Uzbek audio to text using the Whisper model. This application runs on CPU and supports Uzbek language.
+        **Model:** `jmshd/whisper-uz`
+        """
+    )
+    with gr.Row():
+        with gr.Column():
+            audio_input = gr.Audio(
+                label="Upload or Record Audio",
+                type="numpy",
+                sources=["microphone", "upload"]
+            )
+            transcribe_btn = gr.Button("🎯 Transcribe", variant="primary")
+            clear_btn = gr.ClearButton([audio_input])
+        with gr.Column():
+            output_text = gr.Textbox(
+                label="Transcription",
+                placeholder="Your transcribed text will appear here...",
+                lines=10
+            )
+    gr.Markdown(
+        """
+        ### 📝 Usage Instructions:
+        1. Click the microphone icon to record audio or upload an audio file
+        2. Click the "Transcribe" button to convert speech to text
+        3. The transcribed text will appear in the output box
+        ### ℹ️ Information:
+        - Supported language: Uzbek
+        - Processing: CPU-only (may be slower than GPU)
+        - Model size: Small
+        """
+    )
+    transcribe_btn.click(
+        fn=transcribe,
+        inputs=audio_input,
+        outputs=output_text
+    )
+# Launch configuration for Hugging Face Spaces
+if __name__ == "__main__":
+    logger.info("Launching Gradio interface...")
+    iface.launch(
+        share=False,
+        show_error=True,
+        server_name="0.0.0.0",
+        server_port=7860
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio==6.1.0
+transformers>=4.30.0
+torch>=2.0.0
+torchaudio>=2.0.0
+accelerate>=0.20.0
+huggingface_hub>=0.16.0