Spaces:
Configuration error
Configuration error
| import os | |
| import gradio as gr | |
| from openai import OpenAI | |
| from jiwer import wer | |
| from rouge_score import rouge_scorer | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| # Connect to OpenAI API | |
| def get_client(): | |
| api_key = os.getenv("OPENAI_API_KEY") | |
| if not api_key: | |
| raise gr.Error("Missing OPENAI_API_KEY. Please set it in the Space Secrets.") | |
| return OpenAI(api_key=api_key) | |
| # Podcast-style summary prompt | |
| SUMMARY_PROMPT = """ | |
| You are a skilled voice script writer. Convert the following lecture transcript into a speech-friendly, podcast-style script suitable for a 3β5 minute audio revision. | |
| - Target audience is already familiar with the video and wants a clear, efficient recap. | |
| - Preserve all key knowledge nodes and insights; do not omit or add content. | |
| - Remove fillers, repetition, and references to slides or visuals. | |
| - Use natural spoken language suitable for listening. | |
| - Maintain a neutral, engaging tone. | |
| - Format as a smooth podcast monologue. | |
| Important Guidelines: | |
| - The summary should be ~20% of the transcript length. | |
| - Do not impersonate or claim to be a real professor or individual. | |
| - Avoid mentioning specific universities, brands, or affiliations unless explicitly present. | |
| - Do not fabricate facts, examples, or names not in the original transcript. | |
| - Ensure all information remains faithful to the transcript. | |
| """ | |
| def run_pipeline(transcript_file): | |
| if transcript_file is None: | |
| raise gr.Error("Please upload a .txt transcript file.") | |
| # Read transcript | |
| with open(transcript_file.name, "r", encoding="utf-8") as f: | |
| transcript = f.read() | |
| client = get_client() | |
| # Summarization | |
| response = client.chat.completions.create( | |
| model="gpt-4o", | |
| messages=[ | |
| {"role": "system", "content": SUMMARY_PROMPT}, | |
| {"role": "user", "content": transcript} | |
| ] | |
| ) | |
| script_text = response.choices[0].message.content | |
| # TTS | |
| audio_file_path = "summary_audio.mp3" | |
| tts_response = client.audio.speech.create( | |
| model="gpt-4o-mini-tts", | |
| voice="alloy", | |
| input=script_text | |
| ) | |
| with open(audio_file_path, "wb") as f: | |
| f.write(tts_response.read()) | |
| # ASR | |
| with open(audio_file_path, "rb") as f: | |
| asr_response = client.audio.transcriptions.create( | |
| model="whisper-1", | |
| file=f | |
| ) | |
| asr_text = asr_response.text.strip() | |
| # Evaluation | |
| wer_score = wer(script_text.lower(), asr_text.lower()) | |
| scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True) | |
| rouge_l_score = scorer.score(transcript, asr_text)['rougeL'].fmeasure | |
| vec = TfidfVectorizer().fit_transform([transcript, asr_text]) | |
| cos_sim = cosine_similarity(vec[0:1], vec[1:2])[0][0] | |
| # Thresholds | |
| pass_wer = wer_score <= 0.15 | |
| pass_rouge = rouge_l_score >= 0.20 | |
| pass_cosine = cos_sim >= 0.35 | |
| overall_pass = pass_wer and pass_rouge and pass_cosine | |
| eval_dict = { | |
| "WER": round(wer_score, 4), | |
| "WER_pass": pass_wer, | |
| "ROUGE-L_F1": round(rouge_l_score, 4), | |
| "ROUGE_pass": pass_rouge, | |
| "TFIDF_Cosine": round(cos_sim, 4), | |
| "Cosine_pass": pass_cosine, | |
| "Overall": "PASS" if overall_pass else "FAIL" | |
| } | |
| return script_text, audio_file_path, asr_text, eval_dict | |
| # Build Gradio UI | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Transcript β Podcast Summary β TTS β ASR β Evaluation") | |
| infile = gr.File(label="Upload Transcript (.txt)", file_types=[".txt"]) | |
| run_btn = gr.Button("Run Pipeline") | |
| summary_out = gr.Textbox(label="Podcast-style Summary", lines=14) | |
| audio_out = gr.Audio(label="Summary Audio", type="filepath") | |
| asr_out = gr.Textbox(label="ASR Transcript", lines=10) | |
| metrics_out = gr.JSON(label="Evaluation Metrics") | |
| run_btn.click(run_pipeline, inputs=[infile], | |
| outputs=[summary_out, audio_out, asr_out, metrics_out]) | |
| if __name__ == "__main__": | |
| demo.launch() | |