Spaces:

Yijia-Plaud
/

summary_test

Sleeping

App Files Files Community

Yijia-Plaud commited on May 17, 2025

Commit

c44b0cd

verified ·

1 Parent(s): 61e25aa

init commit

Browse files

Files changed (5) hide show

app.py +171 -0
example_meeting.srt +19 -0
llm_utils.py +165 -0
prompts.py +49 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,171 @@

+import gradio as gr
+import os
+from llm_utils import parse_srt_file, analyze_meeting_transcript, generate_summary
+# Gradio State variables to hold intermediate values
+# transcript_state will store the raw text from the SRT file
+# auto_template_state will store the template suggested by the LLM after analysis
+def handle_file_upload(srt_file_obj):
+    """
+    Handles the SRT file upload.
+    Parses the file, analyzes it for industry, purpose, and template suggestion.
+    Returns values to update the UI and state variables.
+    """
+    if srt_file_obj is None:
+        # Clear all fields if no file is present (e.g., file removed)
+        return "", "", "", "", "", "", "请上传SRT文件后再试 (Please upload an SRT file)."
+    srt_file_path = srt_file_obj.name
+    transcript_text = parse_srt_file(srt_file_path)
+    if transcript_text is None:
+        return "", "", "", "SRT文件解析失败 (Error parsing SRT file).", "", "", "SRT文件解析失败，请检查文件格式 (SRT parsing failed, please check file format)."
+    try:
+        industry, meeting_purpose, auto_suggested_template = analyze_meeting_transcript(transcript_text)
+        # Outputs: industry, meeting_purpose, suggested_template, transcript_preview, transcript_state, auto_template_state, summary_placeholder
+        return (
+            industry,
+            meeting_purpose,
+            auto_suggested_template,
+            transcript_text,
+            transcript_text,
+            auto_suggested_template,
+            '上传SRT文件成功，点击"开始总结"生成总结 (SRT file uploaded successfully. Click \'Start Summarization\' to generate summary.)'
+        )
+    except ValueError as ve:  # API key issue
+        error_msg = f"API配置错误 (API Configuration Error): {str(ve)}"
+        return "", "", "", transcript_text, transcript_text, "", error_msg # Industry, Purpose, Template, Preview, TranscriptState, AutoTemplateState, Summary
+    except Exception as e:
+        error_msg = f"会议分析出错 (Error during meeting analysis): {e}"
+        print(error_msg)
+        return "", "", "", transcript_text, transcript_text, "", error_msg # Consistent error return for 7 outputs
+def handle_summarization(current_transcript_text, current_auto_template, reference_summary_text):
+    """
+    Handles the summarization process after the 'Start Summarization' button is clicked.
+    Uses the transcript and either the auto-suggested template or a reference summary.
+    """
+    if not current_transcript_text:
+        return "无转写内容可供总结，请先上传SRT文件 (No transcript to summarize. Please upload an SRT file first)."
+    summary_output = ""
+    try:
+        if reference_summary_text and reference_summary_text.strip():
+            summary_output = generate_summary(current_transcript_text, reference_summary_text=reference_summary_text)
+        elif current_auto_template:
+            summary_output = generate_summary(current_transcript_text, auto_template_structure=current_auto_template)
+        else:
+            # This case might happen if analysis failed but user still clicks summarize
+            return "无法获取总结模板，请确保SRT文件已成功分析 (Could not get summary template. Ensure SRT file was analyzed successfully)."
+    except ValueError as ve: # API key issue
+         return f"API配置错误 (API Configuration Error): {str(ve)}"
+    except Exception as e:
+        error_message = f"生成总结出错 (Error during summary generation): {e}"
+        print(error_message)
+        return error_message
+    return summary_output
+# Gradio Interface Definition
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("## 会议总结程序 V1.1") # Version updated
+    gr.Markdown(
+        "上传会议的SRT格式录音转写文件，系统将：\n" +
+        "1. **自动分析** 会议内容，给出所属行业和推荐的总结模板。\n" +
+        "2. （可选）您可以输入一个参考总结文本。\n" +
+        "3. 点击 **开始总结** 按钮后，系统将根据推荐模板或您提供的参考总结格式，生成最终的会议纪要。"
+    )
+    # Hidden state variables
+    transcript_state = gr.State(value="")
+    auto_template_state = gr.State(value="")
+    with gr.Row():
+        with gr.Column(scale=1):
+            srt_file_input = gr.File(label="上传SRT文件", file_types=[".srt"])
+            reference_summary_input = gr.Textbox(
+                label="参考总结 (可选)",
+                placeholder="如果希望按特定格式总结，请在此处粘贴参考总结文本...",
+                lines=8 # Reduced lines slightly
+            )
+            submit_button = gr.Button("开始总结", variant="primary")
+        with gr.Column(scale=2):
+            gr.Markdown("### 分析结果 (SRT上传后自动生成)")
+            output_industry = gr.Textbox(label="推测行业", interactive=False)
+            output_meeting_purpose = gr.Textbox(label="会议目的", interactive=False) # New UI element
+            # Reverted to gr.Markdown for proper Markdown rendering
+            gr.Markdown("### 推荐总结模板 (SRT上传后自动生成)")
+            output_suggested_template = gr.Markdown(label="推荐总结模板")
+            gr.Markdown("### 会议总结 (点击按钮后生成)")
+            # Changed from gr.Markdown to gr.Textbox for a block look
+            output_summary = gr.Textbox(label="会议总结", interactive=False, lines=10)
+    gr.Markdown("### 原始SRT转写内容 (上传SRT后自动更新)")
+    output_transcript_preview = gr.Textbox(label="SRT文本预览", lines=8, interactive=False) # Reduced lines slightly
+    # Event for file upload: triggers analysis
+    srt_file_input.upload(
+        fn=handle_file_upload,
+        inputs=[srt_file_input],
+        outputs=[
+            output_industry,
+            output_meeting_purpose, # Added new output
+            output_suggested_template,
+            output_transcript_preview,
+            transcript_state, # Update hidden state
+            auto_template_state, # Update hidden state
+            output_summary # Clear or update summary placeholder
+        ]
+    )
+    # Event for button click: triggers summarization
+    submit_button.click(
+        fn=handle_summarization,
+        inputs=[
+            transcript_state, # Use transcript from state
+            auto_template_state, # Use auto_template from state
+            reference_summary_input
+        ],
+        outputs=[output_summary]
+    )
+    # Define a wrapper for the examples fn to match expected output structure
+    def example_fn_wrapper(srt_file_obj, _reference_summary_text):
+        # _reference_summary_text is ignored for the first step of example loading
+        analysis_results = handle_file_upload(srt_file_obj)
+        # Ensure we return 7 values now
+        if isinstance(analysis_results, tuple) and len(analysis_results) == 7:
+            # Replace the last element (initial_summary_msg) with the example-specific message
+            return analysis_results[:-1] + ("Example loaded. Click '开始总结' to generate summary based on this file.",)
+        else:
+            # Fallback if analysis_results is not as expected (e.g. error occurred)
+            # Ensure we still return a tuple of 7 elements to avoid Gradio errors
+            return "Error", "Error", "Error loading example", "Please try manually.", "", "", "Error processing example."
+    gr.Examples(
+        examples=[
+            [os.path.join(os.path.dirname(__file__), "example_meeting.srt"), ""],
+            [
+                os.path.join(os.path.dirname(__file__), "example_meeting.srt"),
+                "会议纪要\\n\\n日期：YYYY-MM-DD\\n主题：示例\\n\\n讨论点：\\n- 点一\\n- 点二\\n\\n行动项：\\n- 某人：某事 (截止日期)"
+            ]
+        ],
+        inputs=[srt_file_input, reference_summary_input],
+        outputs=[output_industry, output_meeting_purpose, output_suggested_template, output_transcript_preview, transcript_state, auto_template_state, output_summary],
+        fn=example_fn_wrapper,
+        cache_examples=False
+    )
+if __name__ == "__main__":
+    if not os.getenv("OPENROUTER_API_KEY"):
+        print("警告: OPENROUTER_API_KEY 环境变量未设置。")
+        print("请在您的环境中设置它，或在项目根目录创建一个 .env 文件，内容如下:")
+        print('OPENROUTER_API_KEY="sk-or-v1-your-key-here"')
+        print('OPENROUTER_MODEL="your-chosen-model" (例如 mistralai/mistral-7b-instruct)')
+        print("程序可能无法正常与大模型交互。")
+    demo.launch()

example_meeting.srt ADDED Viewed

	@@ -0,0 +1,19 @@

+1
+00:00:01,000 --> 00:00:05,000
+Speaker 1: Hello everyone, and welcome to our weekly sync-up.
+2
+00:00:05,500 --> 00:00:08,000
+Speaker 2: Hi! Good to be here.
+3
+00:00:08,500 --> 00:00:12,000
+Speaker 1: Today, we'll discuss the Q3 project progress and upcoming deadlines.
+4
+00:00:12,500 --> 00:00:15,000
+Speaker 3: Sounds good. I have an update on the design phase.
+5
+00:00:15,500 --> 00:00:20,000
+Speaker 1: Great. Let's also make sure to allocate action items by the end of this meeting.

llm_utils.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import os
+import requests
+import pysrt
+import json
+from dotenv import load_dotenv
+from prompts import ANALYZE_MEETING_PROMPT, SUMMARIZE_WITH_AUTO_TEMPLATE_PROMPT, SUMMARIZE_WITH_REFERENCE_TEMPLATE_PROMPT
+load_dotenv()
+OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
+# You can change the model to any other model available on OpenRouter
+# See https://openrouter.ai/docs#models for a list of models
+DEFAULT_MODEL = os.getenv("OPENROUTER_MODEL", "openai/gpt-3.5-turbo")
+def parse_srt_file(srt_file_path):
+    """Parses an SRT file and returns the plain text content."""
+    try:
+        subs = pysrt.open(srt_file_path)
+        transcript_text = "\n".join([sub.text for sub in subs])
+        return transcript_text
+    except Exception as e:
+        print(f"Error parsing SRT file: {e}")
+        return None
+def call_openrouter_api(prompt, model=DEFAULT_MODEL):
+    """Calls the OpenRouter API with the given prompt and model."""
+    if not OPENROUTER_API_KEY:
+        raise ValueError("OPENROUTER_API_KEY not found. Please set it in your .env file or environment variables.")
+    headers = {
+        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
+        "Content-Type": "application/json"
+    }
+    data = {
+        "model": model,
+        "messages": [{"role": "user", "content": prompt}]
+    }
+    try:
+        response = requests.post("https://openrouter.ai/api/v1/chat/completions", headers=headers, json=data, timeout=180)
+        response.raise_for_status() # Raise an exception for bad status codes
+        result = response.json()
+        if result.get("choices") and result["choices"][0].get("message"):
+            return result["choices"][0]["message"]["content"].strip()
+        else:
+            print(f"Unexpected API response format: {result}")
+            return "Error: Could not parse LLM response."
+    except requests.exceptions.RequestException as e:
+        print(f"Error calling OpenRouter API: {e}")
+        if hasattr(e, 'response') and e.response is not None:
+            try:
+                print(f"Response content: {e.response.json()}")
+            except json.JSONDecodeError:
+                print(f"Response content: {e.response.text}")
+        return f"Error: API request failed. {str(e)}"
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+        return "Error: An unexpected error occurred during API call."
+def analyze_meeting_transcript(transcript_text):
+    """Analyzes the transcript to get industry, meeting purpose, and suggested template."""
+    prompt = ANALYZE_MEETING_PROMPT.format(transcript_text=transcript_text)
+    response_text = call_openrouter_api(prompt)
+    # Basic parsing of the response, assuming the specified format
+    industry = "Unknown"
+    meeting_purpose = "Unknown"
+    suggested_template = "Could not determine a template."
+    # Expected format:
+    # Industry: [Identified Industry]
+    # Meeting Purpose: [Identified Meeting Purpose]
+    # Suggested Template:
+    # [Your suggested template structure here...]
+    try:
+        industry_line_end = response_text.find("\nMeeting Purpose:")
+        if industry_line_end != -1:
+            industry = response_text[len("Industry:"):industry_line_end].strip()
+        else: # Fallback if "Meeting Purpose:" not found directly after industry
+            industry_line_alt_end = response_text.find("\n")
+            if industry_line_alt_end != -1 and response_text.startswith("Industry:"):
+                 industry = response_text[len("Industry:"):industry_line_alt_end].strip()
+        purpose_line_start = response_text.find("Meeting Purpose:")
+        purpose_line_end = response_text.find("\nSuggested Template:")
+        if purpose_line_start != -1 and purpose_line_end != -1:
+            meeting_purpose = response_text[purpose_line_start + len("Meeting Purpose:"):purpose_line_end].strip()
+        template_line_start = response_text.find("Suggested Template:")
+        if template_line_start != -1:
+            suggested_template = response_text[template_line_start + len("Suggested Template:"):].strip()
+        else: # If parsing fails spectacularly, return raw for debugging by user if necessary
+            if industry == "Unknown" and meeting_purpose == "Unknown": # only if everything else failed
+                print(f"Could not parse major sections from LLM analysis response. Raw response: {response_text}")
+                return "Unknown", "Unknown", response_text # return raw as template
+    except Exception as e:
+        print(f"Error parsing LLM analysis response: {e}. Raw response: {response_text}")
+        # Fallback to returning raw response in template field if parsing fails significantly
+        return "Unknown", "Unknown", f"Error parsing response. Raw: {response_text}"
+    return industry, meeting_purpose, suggested_template
+def generate_summary(transcript_text, reference_summary_text=None, auto_template_structure=None):
+    """Generates the meeting summary."""
+    if reference_summary_text:
+        prompt = SUMMARIZE_WITH_REFERENCE_TEMPLATE_PROMPT.format(
+            transcript_text=transcript_text,
+            reference_summary_text=reference_summary_text
+        )
+    elif auto_template_structure:
+        prompt = SUMMARIZE_WITH_AUTO_TEMPLATE_PROMPT.format(
+            transcript_text=transcript_text,
+            template_structure=auto_template_structure
+        )
+    else:
+        # Fallback or default summarization if no template is provided
+        # This case should ideally be handled by ensuring one of the templates is always available
+        prompt = f"Please summarize the following meeting transcript:\n\n{transcript_text}"
+    return call_openrouter_api(prompt)
+if __name__ == '__main__':
+    # Example usage (for testing this module directly)
+    # Create a dummy .env file with your OPENROUTER_API_KEY and optionally OPENROUTER_MODEL
+    # e.g., OPENROUTER_API_KEY="sk-or-v1-your-key-here"
+    #       OPENROUTER_MODEL="mistralai/mistral-7b-instruct"
+    print("Testing LLM Utilities...")
+    if not OPENROUTER_API_KEY:
+        print("OPENROUTER_API_KEY not found. Skipping direct test of llm_utils.py. Please set it in .env")
+    else:
+        example_srt_path = "example_meeting.srt"
+        transcript = parse_srt_file(example_srt_path)
+        if transcript:
+            print(f"\n--- Parsed Transcript (first 100 chars): ---\n{transcript[:100]}...")
+            print("\n--- Analyzing Meeting Transcript ---")
+            industry, purpose, template = analyze_meeting_transcript(transcript)
+            print(f"Industry: {industry}")
+            print(f"Meeting Purpose: {purpose}")
+            print(f"Suggested Template:\n{template}")
+            print("\n--- Generating Summary (with auto template) ---")
+            summary1 = generate_summary(transcript, auto_template_structure=template)
+            print(f"Summary 1:\n{summary1}")
+            print("\n--- Generating Summary (with example reference template) ---")
+            example_ref_summary = """
+            **Meeting Notes**
+            Date: YYYY-MM-DD
+            Topic: Project Alpha Update
+            **Key Points:**
+            - Item 1 discussed.
+            - Item 2 resolved.
+            **Action Items:**
+            - User A: Task X (Due: YYYY-MM-DD)
+            """
+            summary2 = generate_summary(transcript, reference_summary_text=example_ref_summary)
+            print(f"Summary 2:\n{summary2}")
+        else:
+            print(f"Could not parse {example_srt_path} for testing.")

prompts.py ADDED Viewed

	@@ -0,0 +1,49 @@

+ANALYZE_MEETING_PROMPT = """
+You are an expert meeting analyst. Analyze the following meeting transcript (from an SRT file) and provide:
+1.  The primary industry this meeting likely belongs to (e.g., Technology, Finance, Healthcare, Education, Marketing, Legal, etc.).
+2.  The main purpose or objective of this meeting (e.g., Project Kick-off, Weekly Sync, Decision Making, Brainstorming, Client Update, etc.).
+3.  A concise suggestion for a meeting summary template structure suitable for this industry and meeting type. Focus on key sections like:
+    *   Meeting Title/Topic
+    *   Date & Time
+    *   Attendees (if discernible, otherwise a placeholder)
+    *   Key Discussion Points / Agenda Items Covered
+    *   Decisions Made / Resolutions
+    *   Action Items (Assignee, Task, Deadline)
+    *   Next Steps / Follow-up
+Transcript:
+{transcript_text}
+Provide your response in the following format:
+Industry: [Identified Industry]
+Meeting Purpose: [Identified Meeting Purpose]
+Suggested Template:
+[Your suggested template structure here, using markdown-like formatting for sections and bullet points where appropriate]
+"""
+SUMMARIZE_WITH_AUTO_TEMPLATE_PROMPT = """
+You are a professional meeting summarizer. Based on the provided meeting transcript and the suggested template structure, generate a concise and informative meeting summary.
+Meeting Transcript (from SRT file):
+{transcript_text}
+Suggested Summary Template Structure:
+{template_structure}
+Generate the meeting summary following the suggested template structure.
+"""
+SUMMARIZE_WITH_REFERENCE_TEMPLATE_PROMPT = """
+You are a professional meeting summarizer. You will be given a meeting transcript and a reference summary.
+Your task is to summarize the provided meeting transcript.
+You MUST follow the *format and structure* of the reference summary as a template.
+Do NOT copy any content from the reference summary. Only use its structure (headings, sections, bullet points, tone, length, etc.) as a guide for your new summary.
+Meeting Transcript (from SRT file):
+{transcript_text}
+Reference Summary (use this for formatting guidance ONLY):
+{reference_summary_text}
+Generate a new meeting summary for the provided transcript, strictly adhering to the format and structure of the reference summary.
+"""

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio
+openai
+python-dotenv
+pysrt
+requests