Spaces:

ssahal
/

RevisionAudioCreation

Configuration error

App Files Files Community

ssahal commited on Aug 19, 2025

Commit

c030906

verified ·

1 Parent(s): a775df8

Update app.py

Browse files

Files changed (1) hide show

app.py +113 -1

app.py CHANGED Viewed

	@@ -1 +1,113 @@
1	- ~~print('hello~~ ~~from app.py')~~

+import os
+import gradio as gr
+from openai import OpenAI
+from jiwer import wer
+from rouge_score import rouge_scorer
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+# Connect to OpenAI API
+def get_client():
+    api_key = os.getenv("OPENAI_API_KEY")
+    if not api_key:
+        raise gr.Error("Missing OPENAI_API_KEY. Please set it in the Space Secrets.")
+    return OpenAI(api_key=api_key)
+# Podcast-style summary prompt
+SUMMARY_PROMPT = """
+You are a skilled voice script writer. Convert the following lecture transcript into a speech-friendly, podcast-style script suitable for a 3–5 minute audio revision.
+- Target audience is already familiar with the video and wants a clear, efficient recap.
+- Preserve all key knowledge nodes and insights; do not omit or add content.
+- Remove fillers, repetition, and references to slides or visuals.
+- Use natural spoken language suitable for listening.
+- Maintain a neutral, engaging tone.
+- Format as a smooth podcast monologue.
+Important Guidelines:
+- The summary should be ~20% of the transcript length.
+- Do not impersonate or claim to be a real professor or individual.
+- Avoid mentioning specific universities, brands, or affiliations unless explicitly present.
+- Do not fabricate facts, examples, or names not in the original transcript.
+- Ensure all information remains faithful to the transcript.
+"""
+def run_pipeline(transcript_file):
+    if transcript_file is None:
+        raise gr.Error("Please upload a .txt transcript file.")
+    # Read transcript
+    with open(transcript_file.name, "r", encoding="utf-8") as f:
+        transcript = f.read()
+    client = get_client()
+    # Summarization
+    response = client.chat.completions.create(
+        model="gpt-4o",
+        messages=[
+            {"role": "system", "content": SUMMARY_PROMPT},
+            {"role": "user", "content": transcript}
+        ]
+    )
+    script_text = response.choices[0].message.content
+    # TTS
+    audio_file_path = "summary_audio.mp3"
+    tts_response = client.audio.speech.create(
+        model="gpt-4o-mini-tts",
+        voice="alloy",
+        input=script_text
+    )
+    with open(audio_file_path, "wb") as f:
+        f.write(tts_response.read())
+    # ASR
+    with open(audio_file_path, "rb") as f:
+        asr_response = client.audio.transcriptions.create(
+            model="whisper-1",
+            file=f
+        )
+    asr_text = asr_response.text.strip()
+    # Evaluation
+    wer_score = wer(script_text.lower(), asr_text.lower())
+    scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
+    rouge_l_score = scorer.score(transcript, asr_text)['rougeL'].fmeasure
+    vec = TfidfVectorizer().fit_transform([transcript, asr_text])
+    cos_sim = cosine_similarity(vec[0:1], vec[1:2])[0][0]
+    # Thresholds
+    pass_wer = wer_score <= 0.15
+    pass_rouge = rouge_l_score >= 0.20
+    pass_cosine = cos_sim >= 0.35
+    overall_pass = pass_wer and pass_rouge and pass_cosine
+    eval_dict = {
+        "WER": round(wer_score, 4),
+        "WER_pass": pass_wer,
+        "ROUGE-L_F1": round(rouge_l_score, 4),
+        "ROUGE_pass": pass_rouge,
+        "TFIDF_Cosine": round(cos_sim, 4),
+        "Cosine_pass": pass_cosine,
+        "Overall": "PASS" if overall_pass else "FAIL"
+    }
+    return script_text, audio_file_path, asr_text, eval_dict
+# Build Gradio UI
+with gr.Blocks() as demo:
+    gr.Markdown("# Transcript → Podcast Summary → TTS → ASR → Evaluation")
+    infile = gr.File(label="Upload Transcript (.txt)", file_types=[".txt"])
+    run_btn = gr.Button("Run Pipeline")
+    summary_out = gr.Textbox(label="Podcast-style Summary", lines=14)
+    audio_out = gr.Audio(label="Summary Audio", type="filepath")
+    asr_out = gr.Textbox(label="ASR Transcript", lines=10)
+    metrics_out = gr.JSON(label="Evaluation Metrics")
+    run_btn.click(run_pipeline, inputs=[infile],
+                  outputs=[summary_out, audio_out, asr_out, metrics_out])
+if __name__ == "__main__":
+    demo.launch()