ssahal commited on
Commit
c030906
·
verified ·
1 Parent(s): a775df8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -1
app.py CHANGED
@@ -1 +1,113 @@
1
- print('hello from app.py')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from openai import OpenAI
4
+ from jiwer import wer
5
+ from rouge_score import rouge_scorer
6
+ from sklearn.feature_extraction.text import TfidfVectorizer
7
+ from sklearn.metrics.pairwise import cosine_similarity
8
+
9
+ # Connect to OpenAI API
10
+ def get_client():
11
+ api_key = os.getenv("OPENAI_API_KEY")
12
+ if not api_key:
13
+ raise gr.Error("Missing OPENAI_API_KEY. Please set it in the Space Secrets.")
14
+ return OpenAI(api_key=api_key)
15
+
16
+ # Podcast-style summary prompt
17
+ SUMMARY_PROMPT = """
18
+ You are a skilled voice script writer. Convert the following lecture transcript into a speech-friendly, podcast-style script suitable for a 3–5 minute audio revision.
19
+ - Target audience is already familiar with the video and wants a clear, efficient recap.
20
+ - Preserve all key knowledge nodes and insights; do not omit or add content.
21
+ - Remove fillers, repetition, and references to slides or visuals.
22
+ - Use natural spoken language suitable for listening.
23
+ - Maintain a neutral, engaging tone.
24
+ - Format as a smooth podcast monologue.
25
+ Important Guidelines:
26
+ - The summary should be ~20% of the transcript length.
27
+ - Do not impersonate or claim to be a real professor or individual.
28
+ - Avoid mentioning specific universities, brands, or affiliations unless explicitly present.
29
+ - Do not fabricate facts, examples, or names not in the original transcript.
30
+ - Ensure all information remains faithful to the transcript.
31
+ """
32
+
33
+ def run_pipeline(transcript_file):
34
+ if transcript_file is None:
35
+ raise gr.Error("Please upload a .txt transcript file.")
36
+
37
+ # Read transcript
38
+ with open(transcript_file.name, "r", encoding="utf-8") as f:
39
+ transcript = f.read()
40
+
41
+ client = get_client()
42
+
43
+ # Summarization
44
+ response = client.chat.completions.create(
45
+ model="gpt-4o",
46
+ messages=[
47
+ {"role": "system", "content": SUMMARY_PROMPT},
48
+ {"role": "user", "content": transcript}
49
+ ]
50
+ )
51
+ script_text = response.choices[0].message.content
52
+
53
+ # TTS
54
+ audio_file_path = "summary_audio.mp3"
55
+ tts_response = client.audio.speech.create(
56
+ model="gpt-4o-mini-tts",
57
+ voice="alloy",
58
+ input=script_text
59
+ )
60
+ with open(audio_file_path, "wb") as f:
61
+ f.write(tts_response.read())
62
+
63
+ # ASR
64
+ with open(audio_file_path, "rb") as f:
65
+ asr_response = client.audio.transcriptions.create(
66
+ model="whisper-1",
67
+ file=f
68
+ )
69
+ asr_text = asr_response.text.strip()
70
+
71
+ # Evaluation
72
+ wer_score = wer(script_text.lower(), asr_text.lower())
73
+ scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
74
+ rouge_l_score = scorer.score(transcript, asr_text)['rougeL'].fmeasure
75
+ vec = TfidfVectorizer().fit_transform([transcript, asr_text])
76
+ cos_sim = cosine_similarity(vec[0:1], vec[1:2])[0][0]
77
+
78
+ # Thresholds
79
+ pass_wer = wer_score <= 0.15
80
+ pass_rouge = rouge_l_score >= 0.20
81
+ pass_cosine = cos_sim >= 0.35
82
+ overall_pass = pass_wer and pass_rouge and pass_cosine
83
+
84
+ eval_dict = {
85
+ "WER": round(wer_score, 4),
86
+ "WER_pass": pass_wer,
87
+ "ROUGE-L_F1": round(rouge_l_score, 4),
88
+ "ROUGE_pass": pass_rouge,
89
+ "TFIDF_Cosine": round(cos_sim, 4),
90
+ "Cosine_pass": pass_cosine,
91
+ "Overall": "PASS" if overall_pass else "FAIL"
92
+ }
93
+
94
+ return script_text, audio_file_path, asr_text, eval_dict
95
+
96
+
97
+ # Build Gradio UI
98
+ with gr.Blocks() as demo:
99
+ gr.Markdown("# Transcript → Podcast Summary → TTS → ASR → Evaluation")
100
+
101
+ infile = gr.File(label="Upload Transcript (.txt)", file_types=[".txt"])
102
+ run_btn = gr.Button("Run Pipeline")
103
+
104
+ summary_out = gr.Textbox(label="Podcast-style Summary", lines=14)
105
+ audio_out = gr.Audio(label="Summary Audio", type="filepath")
106
+ asr_out = gr.Textbox(label="ASR Transcript", lines=10)
107
+ metrics_out = gr.JSON(label="Evaluation Metrics")
108
+
109
+ run_btn.click(run_pipeline, inputs=[infile],
110
+ outputs=[summary_out, audio_out, asr_out, metrics_out])
111
+
112
+ if __name__ == "__main__":
113
+ demo.launch()