Spaces:
Sleeping
Sleeping
| # app.py | |
| import gradio as gr | |
| import json | |
| import re | |
| import os | |
| from datetime import datetime | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| from huggingface_hub import login | |
| # First, login with the Hugging Face token from secrets | |
| try: | |
| hf_token = os.environ.get('HUGGINGFACE_TOKEN') | |
| if hf_token: | |
| login(token=hf_token) | |
| else: | |
| raise ValueError("HUGGINGFACE_TOKEN not found in environment variables") | |
| except Exception as e: | |
| print(f"Error during Hugging Face login: {str(e)}") | |
| raise | |
| class TranscriptAnalyzer: | |
| def __init__(self): | |
| try: | |
| # Initialize the model and tokenizer with auth token | |
| self.model_name = "microsoft/Phi-3.5-mini-instruct" | |
| self.tokenizer = AutoTokenizer.from_pretrained( | |
| self.model_name, | |
| use_auth_token=hf_token, | |
| trust_remote_code=True | |
| ) | |
| self.model = AutoModelForCausalLM.from_pretrained( | |
| self.model_name, | |
| use_auth_token=hf_token, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| trust_remote_code=True | |
| ) | |
| except Exception as e: | |
| print(f"Error initializing model: {str(e)}") | |
| raise | |
| def extract_dates(self, text: str): | |
| date_patterns = [ | |
| r'\d{1,2}[-/]\d{1,2}[-/]\d{2,4}', | |
| r'\d{4}[-/]\d{1,2}[-/]\d{1,2}', | |
| r'\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2},? \d{4}\b' | |
| ] | |
| dates = [] | |
| for pattern in date_patterns: | |
| matches = re.finditer(pattern, text) | |
| for match in matches: | |
| dates.append(match.group()) | |
| return dates | |
| def extract_claim_numbers(self, text: str): | |
| claim_patterns = [ | |
| r'claim\s+#?\s*\d+[-\w]*', | |
| r'#\s*\d+[-\w]*', | |
| r'case\s+#?\s*\d+[-\w]*' | |
| ] | |
| claims = [] | |
| for pattern in claim_patterns: | |
| matches = re.finditer(pattern, text, re.IGNORECASE) | |
| for match in matches: | |
| claims.append(match.group()) | |
| return claims | |
| def generate_prompt(self, transcript: str): | |
| dates = self.extract_dates(transcript) | |
| claims = self.extract_claim_numbers(transcript) | |
| return f"""<s>[INST] Please analyze this meeting transcript with extreme precision and provide a structured analysis. | |
| Remember to: | |
| 1. Only include information explicitly stated | |
| 2. Mark unclear information as "UNCLEAR" | |
| 3. Preserve exact numbers, dates, and claims | |
| 4. Focus on factual content | |
| Identified dates: {', '.join(dates) if dates else 'None'} | |
| Identified claims: {', '.join(claims) if claims else 'None'} | |
| Please analyze: | |
| {transcript} | |
| Provide your analysis in this format: | |
| PARTICIPANTS: | |
| - List participants and their roles | |
| CONTEXT: | |
| - Meeting purpose | |
| - Duration (if mentioned) | |
| KEY POINTS: | |
| - Main topics | |
| - Decisions made | |
| - Important numbers/metrics | |
| ACTION ITEMS: | |
| - Tasks and assignments | |
| - Deadlines | |
| - Responsible parties | |
| FOLLOW UP: | |
| - Next meetings | |
| - Pending items [/INST]</s>""" | |
| def analyze_transcript(self, transcript: str): | |
| try: | |
| # Generate prompt | |
| prompt = self.generate_prompt(transcript) | |
| # Tokenize input | |
| inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device) | |
| # Generate response | |
| with torch.no_grad(): | |
| outputs = self.model.generate( | |
| **inputs, | |
| max_new_tokens=1000, | |
| temperature=0.1, | |
| do_sample=True, | |
| pad_token_id=self.tokenizer.eos_token_id | |
| ) | |
| # Decode response | |
| response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Extract the assistant's response (after the prompt) | |
| response = response.split("[/INST]")[-1].strip() | |
| return response | |
| except Exception as e: | |
| return f"Error analyzing transcript: {str(e)}" | |
| def process_transcript(transcript: str): | |
| try: | |
| analyzer = TranscriptAnalyzer() | |
| analysis = analyzer.analyze_transcript(transcript) | |
| return analysis | |
| except Exception as e: | |
| return f"Error processing transcript: {str(e)}" | |
| # Create Gradio interface | |
| iface = gr.Interface( | |
| fn=process_transcript, | |
| inputs=[ | |
| gr.Textbox( | |
| lines=10, | |
| label="Enter Meeting Transcript", | |
| placeholder="Paste your meeting transcript here..." | |
| ) | |
| ], | |
| outputs=gr.Textbox( | |
| label="Analysis Result", | |
| lines=20 | |
| ), | |
| title="Meeting Transcript Analyzer", | |
| description="Analyze meeting transcripts to extract key information, dates, claims, and action items.", | |
| examples=[ | |
| ["Meeting started on March 15, 2024 at 2:30 PM\nClaim #12345-ABC discussed regarding property damage\nJohn (Project Manager): Let's review the Q1 budget..."], | |
| ["Sarah (Team Lead): Good morning everyone. Today's meeting is about the new product launch.\nMike (Marketing): We're targeting April 1st, 2024 for the release.\nClaim #789-XYZ needs to be resolved before launch."] | |
| ] | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| iface.launch() |