Spaces:

sainathBelagavi
/

transcript_summary

Sleeping

App Files Files Community

sainathBelagavi commited on Dec 23, 2024

Commit

a363c1c

verified ·

1 Parent(s): 79669cc

Create app.py

Browse files

Files changed (1) hide show

app.py +138 -0

app.py ADDED Viewed

	@@ -0,0 +1,138 @@

+# app.py
+import gradio as gr
+import json
+import re
+from datetime import datetime
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+class TranscriptAnalyzer:
+    def __init__(self):
+        # Initialize the model and tokenizer
+        self.model_name = "mistralai/Mistral-7B-Instruct-v0.2"
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
+        self.model = AutoModelForCausalLM.from_pretrained(
+            self.model_name,
+            torch_dtype=torch.float16,
+            device_map="auto"
+        )
+    def extract_dates(self, text: str):
+        date_patterns = [
+            r'\d{1,2}[-/]\d{1,2}[-/]\d{2,4}',
+            r'\d{4}[-/]\d{1,2}[-/]\d{1,2}',
+            r'\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2},? \d{4}\b'
+        ]
+        dates = []
+        for pattern in date_patterns:
+            matches = re.finditer(pattern, text)
+            for match in matches:
+                dates.append(match.group())
+        return dates
+    def extract_claim_numbers(self, text: str):
+        claim_patterns = [
+            r'claim\s+#?\s*\d+[-\w]*',
+            r'#\s*\d+[-\w]*',
+            r'case\s+#?\s*\d+[-\w]*'
+        ]
+        claims = []
+        for pattern in claim_patterns:
+            matches = re.finditer(pattern, text, re.IGNORECASE)
+            for match in matches:
+                claims.append(match.group())
+        return claims
+    def generate_prompt(self, transcript: str):
+        dates = self.extract_dates(transcript)
+        claims = self.extract_claim_numbers(transcript)
+        return f"""<s>[INST] Please analyze this meeting transcript with extreme precision and provide a structured analysis.
+Remember to:
+1. Only include information explicitly stated
+2. Mark unclear information as "UNCLEAR"
+3. Preserve exact numbers, dates, and claims
+4. Focus on factual content
+Identified dates: {', '.join(dates) if dates else 'None'}
+Identified claims: {', '.join(claims) if claims else 'None'}
+Please analyze:
+{transcript}
+Provide your analysis in this format:
+PARTICIPANTS:
+- List participants and their roles
+CONTEXT:
+- Meeting purpose
+- Duration (if mentioned)
+KEY POINTS:
+- Main topics
+- Decisions made
+- Important numbers/metrics
+ACTION ITEMS:
+- Tasks and assignments
+- Deadlines
+- Responsible parties
+FOLLOW UP:
+- Next meetings
+- Pending items [/INST]</s>"""
+    def analyze_transcript(self, transcript: str):
+        # Generate prompt
+        prompt = self.generate_prompt(transcript)
+        # Tokenize input
+        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
+        # Generate response
+        with torch.no_grad():
+            outputs = self.model.generate(
+                **inputs,
+                max_new_tokens=1000,
+                temperature=0.1,
+                do_sample=True,
+                pad_token_id=self.tokenizer.eos_token_id
+            )
+        # Decode response
+        response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Extract the assistant's response (after the prompt)
+        response = response.split("[/INST]")[-1].strip()
+        return response
+def process_transcript(transcript: str):
+    analyzer = TranscriptAnalyzer()
+    analysis = analyzer.analyze_transcript(transcript)
+    return analysis
+# Create Gradio interface
+iface = gr.Interface(
+    fn=process_transcript,
+    inputs=[
+        gr.Textbox(
+            lines=10,
+            label="Enter Meeting Transcript",
+            placeholder="Paste your meeting transcript here..."
+        )
+    ],
+    outputs=gr.Textbox(
+        label="Analysis Result",
+        lines=20
+    ),
+    title="Meeting Transcript Analyzer",
+    description="Analyze meeting transcripts to extract key information, dates, claims, and action items.",
+    examples=[
+        ["Meeting started on March 15, 2024 at 2:30 PM\nClaim #12345-ABC discussed regarding property damage\nJohn (Project Manager): Let's review the Q1 budget..."],
+        ["Sarah (Team Lead): Good morning everyone. Today's meeting is about the new product launch.\nMike (Marketing): We're targeting April 1st, 2024 for the release.\nClaim #789-XYZ needs to be resolved before launch."]
+    ]
+)
+# Launch the app
+iface.launch()