Yijia-Plaud commited on
Commit
c44b0cd
·
verified ·
1 Parent(s): 61e25aa

init commit

Browse files
Files changed (5) hide show
  1. app.py +171 -0
  2. example_meeting.srt +19 -0
  3. llm_utils.py +165 -0
  4. prompts.py +49 -0
  5. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from llm_utils import parse_srt_file, analyze_meeting_transcript, generate_summary
4
+
5
+ # Gradio State variables to hold intermediate values
6
+ # transcript_state will store the raw text from the SRT file
7
+ # auto_template_state will store the template suggested by the LLM after analysis
8
+
9
+ def handle_file_upload(srt_file_obj):
10
+ """
11
+ Handles the SRT file upload.
12
+ Parses the file, analyzes it for industry, purpose, and template suggestion.
13
+ Returns values to update the UI and state variables.
14
+ """
15
+ if srt_file_obj is None:
16
+ # Clear all fields if no file is present (e.g., file removed)
17
+ return "", "", "", "", "", "", "请上传SRT文件后再试 (Please upload an SRT file)."
18
+
19
+ srt_file_path = srt_file_obj.name
20
+ transcript_text = parse_srt_file(srt_file_path)
21
+
22
+ if transcript_text is None:
23
+ return "", "", "", "SRT文件解析失败 (Error parsing SRT file).", "", "", "SRT文件解析失败,请检查文件格式 (SRT parsing failed, please check file format)."
24
+
25
+ try:
26
+ industry, meeting_purpose, auto_suggested_template = analyze_meeting_transcript(transcript_text)
27
+ # Outputs: industry, meeting_purpose, suggested_template, transcript_preview, transcript_state, auto_template_state, summary_placeholder
28
+ return (
29
+ industry,
30
+ meeting_purpose,
31
+ auto_suggested_template,
32
+ transcript_text,
33
+ transcript_text,
34
+ auto_suggested_template,
35
+ '上传SRT文件成功,点击"开始总结"生成总结 (SRT file uploaded successfully. Click \'Start Summarization\' to generate summary.)'
36
+ )
37
+ except ValueError as ve: # API key issue
38
+ error_msg = f"API配置错误 (API Configuration Error): {str(ve)}"
39
+ return "", "", "", transcript_text, transcript_text, "", error_msg # Industry, Purpose, Template, Preview, TranscriptState, AutoTemplateState, Summary
40
+ except Exception as e:
41
+ error_msg = f"会议分析出错 (Error during meeting analysis): {e}"
42
+ print(error_msg)
43
+ return "", "", "", transcript_text, transcript_text, "", error_msg # Consistent error return for 7 outputs
44
+
45
+
46
+ def handle_summarization(current_transcript_text, current_auto_template, reference_summary_text):
47
+ """
48
+ Handles the summarization process after the 'Start Summarization' button is clicked.
49
+ Uses the transcript and either the auto-suggested template or a reference summary.
50
+ """
51
+ if not current_transcript_text:
52
+ return "无转写内容可供总结,请先上传SRT文件 (No transcript to summarize. Please upload an SRT file first)."
53
+
54
+ summary_output = ""
55
+ try:
56
+ if reference_summary_text and reference_summary_text.strip():
57
+ summary_output = generate_summary(current_transcript_text, reference_summary_text=reference_summary_text)
58
+ elif current_auto_template:
59
+ summary_output = generate_summary(current_transcript_text, auto_template_structure=current_auto_template)
60
+ else:
61
+ # This case might happen if analysis failed but user still clicks summarize
62
+ return "无法获取总结模板,请确保SRT文件已成功分析 (Could not get summary template. Ensure SRT file was analyzed successfully)."
63
+ except ValueError as ve: # API key issue
64
+ return f"API配置错误 (API Configuration Error): {str(ve)}"
65
+ except Exception as e:
66
+ error_message = f"生成总结出错 (Error during summary generation): {e}"
67
+ print(error_message)
68
+ return error_message
69
+
70
+ return summary_output
71
+
72
+ # Gradio Interface Definition
73
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
74
+ gr.Markdown("## 会议总结程序 V1.1") # Version updated
75
+ gr.Markdown(
76
+ "上传会议的SRT格式录音转写文件,系统将:\n" +
77
+ "1. **自动分析** 会议内容,给出所属行业和推荐的总结模板。\n" +
78
+ "2. (可选)您可以输入一个参考总结文本。\n" +
79
+ "3. 点击 **开始总结** 按钮后,系统将根据推荐模板或您提供的参考总结格式,生成最终的会议纪要。"
80
+ )
81
+
82
+ # Hidden state variables
83
+ transcript_state = gr.State(value="")
84
+ auto_template_state = gr.State(value="")
85
+
86
+ with gr.Row():
87
+ with gr.Column(scale=1):
88
+ srt_file_input = gr.File(label="上传SRT文件", file_types=[".srt"])
89
+ reference_summary_input = gr.Textbox(
90
+ label="参考总结 (可选)",
91
+ placeholder="如果希望按特定格式总结,请在此处粘贴参考总结文本...",
92
+ lines=8 # Reduced lines slightly
93
+ )
94
+ submit_button = gr.Button("开始总结", variant="primary")
95
+
96
+ with gr.Column(scale=2):
97
+ gr.Markdown("### 分析结果 (SRT上传后自动生成)")
98
+ output_industry = gr.Textbox(label="推测行业", interactive=False)
99
+ output_meeting_purpose = gr.Textbox(label="会议目的", interactive=False) # New UI element
100
+ # Reverted to gr.Markdown for proper Markdown rendering
101
+ gr.Markdown("### 推荐总结模板 (SRT上传后自动生成)")
102
+ output_suggested_template = gr.Markdown(label="推荐总结模板")
103
+
104
+ gr.Markdown("### 会议总结 (点击按钮后生成)")
105
+ # Changed from gr.Markdown to gr.Textbox for a block look
106
+ output_summary = gr.Textbox(label="会议总结", interactive=False, lines=10)
107
+
108
+ gr.Markdown("### 原始SRT转写内容 (上传SRT后自动更新)")
109
+ output_transcript_preview = gr.Textbox(label="SRT文本预览", lines=8, interactive=False) # Reduced lines slightly
110
+
111
+ # Event for file upload: triggers analysis
112
+ srt_file_input.upload(
113
+ fn=handle_file_upload,
114
+ inputs=[srt_file_input],
115
+ outputs=[
116
+ output_industry,
117
+ output_meeting_purpose, # Added new output
118
+ output_suggested_template,
119
+ output_transcript_preview,
120
+ transcript_state, # Update hidden state
121
+ auto_template_state, # Update hidden state
122
+ output_summary # Clear or update summary placeholder
123
+ ]
124
+ )
125
+
126
+ # Event for button click: triggers summarization
127
+ submit_button.click(
128
+ fn=handle_summarization,
129
+ inputs=[
130
+ transcript_state, # Use transcript from state
131
+ auto_template_state, # Use auto_template from state
132
+ reference_summary_input
133
+ ],
134
+ outputs=[output_summary]
135
+ )
136
+
137
+ # Define a wrapper for the examples fn to match expected output structure
138
+ def example_fn_wrapper(srt_file_obj, _reference_summary_text):
139
+ # _reference_summary_text is ignored for the first step of example loading
140
+ analysis_results = handle_file_upload(srt_file_obj)
141
+ # Ensure we return 7 values now
142
+ if isinstance(analysis_results, tuple) and len(analysis_results) == 7:
143
+ # Replace the last element (initial_summary_msg) with the example-specific message
144
+ return analysis_results[:-1] + ("Example loaded. Click '开始总结' to generate summary based on this file.",)
145
+ else:
146
+ # Fallback if analysis_results is not as expected (e.g. error occurred)
147
+ # Ensure we still return a tuple of 7 elements to avoid Gradio errors
148
+ return "Error", "Error", "Error loading example", "Please try manually.", "", "", "Error processing example."
149
+
150
+ gr.Examples(
151
+ examples=[
152
+ [os.path.join(os.path.dirname(__file__), "example_meeting.srt"), ""],
153
+ [
154
+ os.path.join(os.path.dirname(__file__), "example_meeting.srt"),
155
+ "会议纪要\\n\\n日期:YYYY-MM-DD\\n主题:示例\\n\\n讨论点:\\n- 点一\\n- 点二\\n\\n行动项:\\n- 某人:某事 (截止日期)"
156
+ ]
157
+ ],
158
+ inputs=[srt_file_input, reference_summary_input],
159
+ outputs=[output_industry, output_meeting_purpose, output_suggested_template, output_transcript_preview, transcript_state, auto_template_state, output_summary],
160
+ fn=example_fn_wrapper,
161
+ cache_examples=False
162
+ )
163
+
164
+ if __name__ == "__main__":
165
+ if not os.getenv("OPENROUTER_API_KEY"):
166
+ print("警告: OPENROUTER_API_KEY 环境变量未设置。")
167
+ print("请在您的环境中设置它,或在项目根目录创建一个 .env 文件,内容如下:")
168
+ print('OPENROUTER_API_KEY="sk-or-v1-your-key-here"')
169
+ print('OPENROUTER_MODEL="your-chosen-model" (例如 mistralai/mistral-7b-instruct)')
170
+ print("程序可能无法正常与大模型交互。")
171
+ demo.launch()
example_meeting.srt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1
2
+ 00:00:01,000 --> 00:00:05,000
3
+ Speaker 1: Hello everyone, and welcome to our weekly sync-up.
4
+
5
+ 2
6
+ 00:00:05,500 --> 00:00:08,000
7
+ Speaker 2: Hi! Good to be here.
8
+
9
+ 3
10
+ 00:00:08,500 --> 00:00:12,000
11
+ Speaker 1: Today, we'll discuss the Q3 project progress and upcoming deadlines.
12
+
13
+ 4
14
+ 00:00:12,500 --> 00:00:15,000
15
+ Speaker 3: Sounds good. I have an update on the design phase.
16
+
17
+ 5
18
+ 00:00:15,500 --> 00:00:20,000
19
+ Speaker 1: Great. Let's also make sure to allocate action items by the end of this meeting.
llm_utils.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import pysrt
4
+ import json
5
+ from dotenv import load_dotenv
6
+ from prompts import ANALYZE_MEETING_PROMPT, SUMMARIZE_WITH_AUTO_TEMPLATE_PROMPT, SUMMARIZE_WITH_REFERENCE_TEMPLATE_PROMPT
7
+
8
+ load_dotenv()
9
+
10
+ OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
11
+ # You can change the model to any other model available on OpenRouter
12
+ # See https://openrouter.ai/docs#models for a list of models
13
+ DEFAULT_MODEL = os.getenv("OPENROUTER_MODEL", "openai/gpt-3.5-turbo")
14
+
15
+
16
+ def parse_srt_file(srt_file_path):
17
+ """Parses an SRT file and returns the plain text content."""
18
+ try:
19
+ subs = pysrt.open(srt_file_path)
20
+ transcript_text = "\n".join([sub.text for sub in subs])
21
+ return transcript_text
22
+ except Exception as e:
23
+ print(f"Error parsing SRT file: {e}")
24
+ return None
25
+
26
+ def call_openrouter_api(prompt, model=DEFAULT_MODEL):
27
+ """Calls the OpenRouter API with the given prompt and model."""
28
+ if not OPENROUTER_API_KEY:
29
+ raise ValueError("OPENROUTER_API_KEY not found. Please set it in your .env file or environment variables.")
30
+
31
+ headers = {
32
+ "Authorization": f"Bearer {OPENROUTER_API_KEY}",
33
+ "Content-Type": "application/json"
34
+ }
35
+ data = {
36
+ "model": model,
37
+ "messages": [{"role": "user", "content": prompt}]
38
+ }
39
+ try:
40
+ response = requests.post("https://openrouter.ai/api/v1/chat/completions", headers=headers, json=data, timeout=180)
41
+ response.raise_for_status() # Raise an exception for bad status codes
42
+ result = response.json()
43
+ if result.get("choices") and result["choices"][0].get("message"):
44
+ return result["choices"][0]["message"]["content"].strip()
45
+ else:
46
+ print(f"Unexpected API response format: {result}")
47
+ return "Error: Could not parse LLM response."
48
+ except requests.exceptions.RequestException as e:
49
+ print(f"Error calling OpenRouter API: {e}")
50
+ if hasattr(e, 'response') and e.response is not None:
51
+ try:
52
+ print(f"Response content: {e.response.json()}")
53
+ except json.JSONDecodeError:
54
+ print(f"Response content: {e.response.text}")
55
+ return f"Error: API request failed. {str(e)}"
56
+ except Exception as e:
57
+ print(f"An unexpected error occurred: {e}")
58
+ return "Error: An unexpected error occurred during API call."
59
+
60
+ def analyze_meeting_transcript(transcript_text):
61
+ """Analyzes the transcript to get industry, meeting purpose, and suggested template."""
62
+ prompt = ANALYZE_MEETING_PROMPT.format(transcript_text=transcript_text)
63
+ response_text = call_openrouter_api(prompt)
64
+
65
+ # Basic parsing of the response, assuming the specified format
66
+ industry = "Unknown"
67
+ meeting_purpose = "Unknown"
68
+ suggested_template = "Could not determine a template."
69
+
70
+ # Expected format:
71
+ # Industry: [Identified Industry]
72
+ # Meeting Purpose: [Identified Meeting Purpose]
73
+ # Suggested Template:
74
+ # [Your suggested template structure here...]
75
+
76
+ try:
77
+ industry_line_end = response_text.find("\nMeeting Purpose:")
78
+ if industry_line_end != -1:
79
+ industry = response_text[len("Industry:"):industry_line_end].strip()
80
+ else: # Fallback if "Meeting Purpose:" not found directly after industry
81
+ industry_line_alt_end = response_text.find("\n")
82
+ if industry_line_alt_end != -1 and response_text.startswith("Industry:"):
83
+ industry = response_text[len("Industry:"):industry_line_alt_end].strip()
84
+
85
+ purpose_line_start = response_text.find("Meeting Purpose:")
86
+ purpose_line_end = response_text.find("\nSuggested Template:")
87
+ if purpose_line_start != -1 and purpose_line_end != -1:
88
+ meeting_purpose = response_text[purpose_line_start + len("Meeting Purpose:"):purpose_line_end].strip()
89
+
90
+ template_line_start = response_text.find("Suggested Template:")
91
+ if template_line_start != -1:
92
+ suggested_template = response_text[template_line_start + len("Suggested Template:"):].strip()
93
+ else: # If parsing fails spectacularly, return raw for debugging by user if necessary
94
+ if industry == "Unknown" and meeting_purpose == "Unknown": # only if everything else failed
95
+ print(f"Could not parse major sections from LLM analysis response. Raw response: {response_text}")
96
+ return "Unknown", "Unknown", response_text # return raw as template
97
+
98
+ except Exception as e:
99
+ print(f"Error parsing LLM analysis response: {e}. Raw response: {response_text}")
100
+ # Fallback to returning raw response in template field if parsing fails significantly
101
+ return "Unknown", "Unknown", f"Error parsing response. Raw: {response_text}"
102
+
103
+ return industry, meeting_purpose, suggested_template
104
+
105
+ def generate_summary(transcript_text, reference_summary_text=None, auto_template_structure=None):
106
+ """Generates the meeting summary."""
107
+ if reference_summary_text:
108
+ prompt = SUMMARIZE_WITH_REFERENCE_TEMPLATE_PROMPT.format(
109
+ transcript_text=transcript_text,
110
+ reference_summary_text=reference_summary_text
111
+ )
112
+ elif auto_template_structure:
113
+ prompt = SUMMARIZE_WITH_AUTO_TEMPLATE_PROMPT.format(
114
+ transcript_text=transcript_text,
115
+ template_structure=auto_template_structure
116
+ )
117
+ else:
118
+ # Fallback or default summarization if no template is provided
119
+ # This case should ideally be handled by ensuring one of the templates is always available
120
+ prompt = f"Please summarize the following meeting transcript:\n\n{transcript_text}"
121
+
122
+ return call_openrouter_api(prompt)
123
+
124
+ if __name__ == '__main__':
125
+ # Example usage (for testing this module directly)
126
+ # Create a dummy .env file with your OPENROUTER_API_KEY and optionally OPENROUTER_MODEL
127
+ # e.g., OPENROUTER_API_KEY="sk-or-v1-your-key-here"
128
+ # OPENROUTER_MODEL="mistralai/mistral-7b-instruct"
129
+
130
+ print("Testing LLM Utilities...")
131
+ if not OPENROUTER_API_KEY:
132
+ print("OPENROUTER_API_KEY not found. Skipping direct test of llm_utils.py. Please set it in .env")
133
+ else:
134
+ example_srt_path = "example_meeting.srt"
135
+ transcript = parse_srt_file(example_srt_path)
136
+ if transcript:
137
+ print(f"\n--- Parsed Transcript (first 100 chars): ---\n{transcript[:100]}...")
138
+
139
+ print("\n--- Analyzing Meeting Transcript ---")
140
+ industry, purpose, template = analyze_meeting_transcript(transcript)
141
+ print(f"Industry: {industry}")
142
+ print(f"Meeting Purpose: {purpose}")
143
+ print(f"Suggested Template:\n{template}")
144
+
145
+ print("\n--- Generating Summary (with auto template) ---")
146
+ summary1 = generate_summary(transcript, auto_template_structure=template)
147
+ print(f"Summary 1:\n{summary1}")
148
+
149
+ print("\n--- Generating Summary (with example reference template) ---")
150
+ example_ref_summary = """
151
+ **Meeting Notes**
152
+ Date: YYYY-MM-DD
153
+ Topic: Project Alpha Update
154
+
155
+ **Key Points:**
156
+ - Item 1 discussed.
157
+ - Item 2 resolved.
158
+
159
+ **Action Items:**
160
+ - User A: Task X (Due: YYYY-MM-DD)
161
+ """
162
+ summary2 = generate_summary(transcript, reference_summary_text=example_ref_summary)
163
+ print(f"Summary 2:\n{summary2}")
164
+ else:
165
+ print(f"Could not parse {example_srt_path} for testing.")
prompts.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ANALYZE_MEETING_PROMPT = """
2
+ You are an expert meeting analyst. Analyze the following meeting transcript (from an SRT file) and provide:
3
+ 1. The primary industry this meeting likely belongs to (e.g., Technology, Finance, Healthcare, Education, Marketing, Legal, etc.).
4
+ 2. The main purpose or objective of this meeting (e.g., Project Kick-off, Weekly Sync, Decision Making, Brainstorming, Client Update, etc.).
5
+ 3. A concise suggestion for a meeting summary template structure suitable for this industry and meeting type. Focus on key sections like:
6
+ * Meeting Title/Topic
7
+ * Date & Time
8
+ * Attendees (if discernible, otherwise a placeholder)
9
+ * Key Discussion Points / Agenda Items Covered
10
+ * Decisions Made / Resolutions
11
+ * Action Items (Assignee, Task, Deadline)
12
+ * Next Steps / Follow-up
13
+
14
+ Transcript:
15
+ {transcript_text}
16
+
17
+ Provide your response in the following format:
18
+ Industry: [Identified Industry]
19
+ Meeting Purpose: [Identified Meeting Purpose]
20
+ Suggested Template:
21
+ [Your suggested template structure here, using markdown-like formatting for sections and bullet points where appropriate]
22
+ """
23
+
24
+ SUMMARIZE_WITH_AUTO_TEMPLATE_PROMPT = """
25
+ You are a professional meeting summarizer. Based on the provided meeting transcript and the suggested template structure, generate a concise and informative meeting summary.
26
+
27
+ Meeting Transcript (from SRT file):
28
+ {transcript_text}
29
+
30
+ Suggested Summary Template Structure:
31
+ {template_structure}
32
+
33
+ Generate the meeting summary following the suggested template structure.
34
+ """
35
+
36
+ SUMMARIZE_WITH_REFERENCE_TEMPLATE_PROMPT = """
37
+ You are a professional meeting summarizer. You will be given a meeting transcript and a reference summary.
38
+ Your task is to summarize the provided meeting transcript.
39
+ You MUST follow the *format and structure* of the reference summary as a template.
40
+ Do NOT copy any content from the reference summary. Only use its structure (headings, sections, bullet points, tone, length, etc.) as a guide for your new summary.
41
+
42
+ Meeting Transcript (from SRT file):
43
+ {transcript_text}
44
+
45
+ Reference Summary (use this for formatting guidance ONLY):
46
+ {reference_summary_text}
47
+
48
+ Generate a new meeting summary for the provided transcript, strictly adhering to the format and structure of the reference summary.
49
+ """
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ openai
3
+ python-dotenv
4
+ pysrt
5
+ requests