Spaces:
Build error
Build error
| import streamlit as st | |
| from faster_whisper import WhisperModel | |
| from transformers import pipeline | |
| from pydub import AudioSegment | |
| import numpy as np | |
| def initialize_model(): | |
| """Initialize the Whisper model and AI detection pipeline.""" | |
| model = WhisperModel("medium", device="cpu", compute_type="int8") | |
| ai_detector = pipeline("text-classification", model="roberta-base-openai-detector") | |
| return model, ai_detector | |
| def preprocess_audio(uploaded_file): | |
| """Preprocess uploaded audio file for transcription.""" | |
| audio = AudioSegment.from_file(uploaded_file) | |
| audio = audio.set_frame_rate(16000).set_channels(1).normalize() | |
| samples = np.array(audio.get_array_of_samples(), dtype=np.float32) / 32768.0 | |
| return samples | |
| def transcribe_audio(samples, model): | |
| """Transcribe audio using Whisper.""" | |
| segments, _ = model.transcribe(samples, language="en", vad_filter=True, beam_size=3) | |
| return [segment.text for segment in segments] | |
| def combine_sentences(transcriptions, group_size=3): | |
| """Combine 2-3 sentences into a single chunk.""" | |
| combined = [] | |
| for i in range(0, len(transcriptions), group_size): | |
| chunk = " ".join(transcriptions[i:i + group_size]) | |
| combined.append(chunk) | |
| return combined | |
| def ai_detection(text, ai_detector): | |
| """Perform AI detection on combined text.""" | |
| if len(text.split()) < 5: | |
| return {"classification": "Insufficient Data", "probability": 0.0, "confidence": "Low"} | |
| result = ai_detector(text)[0] | |
| label = "Human" if result["label"] == "Real" else "AI" # Map labels | |
| return { | |
| "classification": label, | |
| "probability": result["score"], | |
| "confidence": "High" if result["score"] > 0.7 else "Medium" if result["score"] > 0.5 else "Low" | |
| } | |
| def run_app(): | |
| """Main Streamlit app.""" | |
| st.title("AI Speech Detector") | |
| st.subheader("Upload an audio file for transcription and AI analysis.") | |
| st.markdown(""" | |
| This app uses the Whisper model for speech-to-text transcription and AI detection to classify the text. | |
| Supported audio formats: **.wav**, **.mp3**. | |
| """) | |
| # Load models | |
| model, ai_detector = initialize_model() | |
| # File uploader | |
| uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3"]) | |
| if uploaded_file: | |
| st.info("Processing audio... Please wait.") | |
| try: | |
| # Preprocess and transcribe | |
| samples = preprocess_audio(uploaded_file) | |
| transcription = transcribe_audio(samples, model) | |
| # Combine sentences | |
| combined_transcription = combine_sentences(transcription, group_size=3) | |
| full_transcript = "\n".join(combined_transcription) | |
| st.text_area("Transcription", value=full_transcript, height=300) | |
| # AI Detection on combined sentences | |
| st.subheader("AI Detection Results") | |
| for text in combined_transcription: | |
| detection_result = ai_detection(text, ai_detector) | |
| st.write(f"**Text:** {text}") | |
| st.write(f"- **Classification:** {detection_result['classification']}") | |
| st.write(f"- **Probability:** {detection_result['probability']:.2f}") | |
| st.write(f"- **Confidence:** {detection_result['confidence']}") | |
| st.markdown("---") | |
| except Exception as e: | |
| st.error(f"Error processing audio: {str(e)}") | |
| if __name__ == "__main__": | |
| run_app() |