ssahal commited on
Commit
264982a
·
verified ·
1 Parent(s): 7cd6d53

Upload 2 files

Browse files
Files changed (2) hide show
  1. Dockerfile (1) +10 -0
  2. app (2).py +116 -0
Dockerfile (1) ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ dockerfile_content = """FROM python:3.10-slim
2
+ WORKDIR /app
3
+ COPY requirements.txt .
4
+ RUN pip install --no-cache-dir -r requirements.txt
5
+ COPY . .
6
+ ENV GRADIO_SERVER_NAME=0.0.0.0
7
+ ENV GRADIO_SERVER_PORT=7860
8
+ EXPOSE 7860
9
+ CMD ["python", "app.py"]
10
+ """
app (2).py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from openai import OpenAI
4
+ from jiwer import wer
5
+ from rouge_score import rouge_scorer
6
+ from sklearn.feature_extraction.text import TfidfVectorizer
7
+ from sklearn.metrics.pairwise import cosine_similarity
8
+
9
+ # Connect to OpenAI API
10
+ def get_client():
11
+ api_key = os.getenv("OPENAI_API_KEY")
12
+ if not api_key:
13
+ raise gr.Error("Missing OPENAI_API_KEY. Please set it in the Space Secrets.")
14
+ return OpenAI(api_key=api_key)
15
+
16
+ # Exact podcast-style summarization prompt
17
+ SUMMARY_PROMPT = """
18
+ You are a skilled voice script writer. Convert the following lecture transcript into a speech-friendly, podcast-style script suitable for a 3–5 minute audio revision.
19
+ - Target audience is already familiar with the video and wants a clear, efficient recap.
20
+ - Preserve all key knowledge nodes and insights; do not omit or add content.
21
+ - Remove fillers, repetition, and references to slides or visuals.
22
+ - Use natural spoken language suitable for listening.
23
+ - Maintain a neutral, engaging tone.
24
+ - Format as a smooth podcast monologue.
25
+ Important Guidelines for Output:
26
+ - The summary should be ~ 20% of the length of the transcripts.
27
+ - Do not impersonate or claim to be a real professor, instructor, or individual from any real-world institution.
28
+ - Avoid mentioning any specific universities, brands, or affiliations unless they are explicitly provided in the source content and approved for use.
29
+ - Do not fabricate facts, examples, or names that are not in the original transcript.
30
+ - Maintain a professional, neutral, and accessible tone without introducing bias or personal opinions.
31
+ - Exclude any personal or sensitive information unless it is anonymized or part of the approved source material.
32
+ - Ensure all information remains faithful to the transcript and is accurate to the best of your ability.
33
+ """
34
+
35
+ def run_pipeline(transcript_file):
36
+ if transcript_file is None:
37
+ raise gr.Error("Please upload a .txt transcript file.")
38
+
39
+ # Read transcript
40
+ with open(transcript_file.name, "r", encoding="utf-8") as f:
41
+ transcript = f.read()
42
+
43
+ client = get_client()
44
+
45
+ # Summarization
46
+ response = client.chat.completions.create(
47
+ model="gpt-4o",
48
+ messages=[
49
+ {"role": "system", "content": SUMMARY_PROMPT},
50
+ {"role": "user", "content": transcript}
51
+ ]
52
+ )
53
+ script_text = response.choices[0].message.content
54
+
55
+ # TTS
56
+ audio_file_path = "summary_audio.mp3"
57
+ tts_response = client.audio.speech.create(
58
+ model="gpt-4o-mini-tts",
59
+ voice="alloy",
60
+ input=script_text
61
+ )
62
+ with open(audio_file_path, "wb") as f:
63
+ f.write(tts_response.read())
64
+
65
+ # ASR
66
+ with open(audio_file_path, "rb") as f:
67
+ asr_response = client.audio.transcriptions.create(
68
+ model="whisper-1",
69
+ file=f
70
+ )
71
+ asr_text = asr_response.text.strip()
72
+
73
+ # Evaluation
74
+ wer_score = wer(script_text.lower(), asr_text.lower())
75
+ scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
76
+ rouge_l_score = scorer.score(transcript, asr_text)['rougeL'].fmeasure
77
+ vec = TfidfVectorizer().fit_transform([transcript, asr_text])
78
+ cos_sim = cosine_similarity(vec[0:1], vec[1:2])[0][0]
79
+
80
+ # Threshold checks
81
+ pass_wer = wer_score <= 0.15
82
+ pass_rouge = rouge_l_score >= 0.20
83
+ pass_cosine = cos_sim >= 0.35
84
+ overall_pass = pass_wer and pass_rouge and pass_cosine
85
+
86
+ eval_dict = {
87
+ "WER": round(wer_score, 4),
88
+ "WER_pass": pass_wer,
89
+ "ROUGE-L_F1": round(rouge_l_score, 4),
90
+ "ROUGE_pass": pass_rouge,
91
+ "TFIDF_Cosine": round(cos_sim, 4),
92
+ "Cosine_pass": pass_cosine,
93
+ "Overall": "PASS" if overall_pass else "FAIL"
94
+ }
95
+
96
+ return script_text, audio_file_path, asr_text, eval_dict
97
+
98
+
99
+ # Build Gradio UI
100
+ with gr.Blocks() as demo:
101
+ gr.Markdown("# Transcript Upload → Podcast-style Summary → TTS → ASR → Evaluation")
102
+
103
+ infile = gr.File(label="Upload Transcript (.txt)", file_types=[".txt"])
104
+ run_btn = gr.Button("Run Pipeline")
105
+
106
+ summary_out = gr.Textbox(label="Podcast-style Summary", lines=14)
107
+ audio_out = gr.Audio(label="Summary Audio", type="filepath")
108
+ asr_out = gr.Textbox(label="ASR Transcript", lines=10)
109
+ metrics_out = gr.JSON(label="Evaluation Metrics")
110
+
111
+ run_btn.click(run_pipeline, inputs=[infile],
112
+ outputs=[summary_out, audio_out, asr_out, metrics_out])
113
+
114
+ if __name__ == "__main__":
115
+ demo.launch()
116
+