Spaces:

sainathBelagavi
/

transcript_summary

Sleeping

App Files Files Community

transcript_summary / app.py

sainathBelagavi

Update app.py

cab4e03 verified about 1 year ago

raw

history blame contribute delete

5.38 kB

	# app.py
	import gradio as gr
	import json
	import re
	import os
	from datetime import datetime
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch
	from huggingface_hub import login

	# First, login with the Hugging Face token from secrets

	try:
	hf_token = os.environ.get('HUGGINGFACE_TOKEN')
	if hf_token:
	login(token=hf_token)
	else:
	raise ValueError("HUGGINGFACE_TOKEN not found in environment variables")
	except Exception as e:
	print(f"Error during Hugging Face login: {str(e)}")
	raise

	class TranscriptAnalyzer:
	def __init__(self):
	try:
	# Initialize the model and tokenizer with auth token
	self.model_name = "microsoft/Phi-3.5-mini-instruct"
	self.tokenizer = AutoTokenizer.from_pretrained(
	self.model_name,
	use_auth_token=hf_token,
	trust_remote_code=True
	)
	self.model = AutoModelForCausalLM.from_pretrained(
	self.model_name,
	use_auth_token=hf_token,
	torch_dtype=torch.float16,
	device_map="auto",
	trust_remote_code=True
	)
	except Exception as e:
	print(f"Error initializing model: {str(e)}")
	raise


	def extract_dates(self, text: str):
	date_patterns = [
	r'\d{1,2}[-/]\d{1,2}[-/]\d{2,4}',
	r'\d{4}[-/]\d{1,2}[-/]\d{1,2}',
	r'\b(?:Jan\|Feb\|Mar\|Apr\|May\|Jun\|Jul\|Aug\|Sep\|Oct\|Nov\|Dec)[a-z]* \d{1,2},? \d{4}\b'
	]
	dates = []
	for pattern in date_patterns:
	matches = re.finditer(pattern, text)
	for match in matches:
	dates.append(match.group())
	return dates

	def extract_claim_numbers(self, text: str):
	claim_patterns = [
	r'claim\s+#?\s\d+[-\w]',
	r'#\s\d+[-\w]',
	r'case\s+#?\s\d+[-\w]'
	]
	claims = []
	for pattern in claim_patterns:
	matches = re.finditer(pattern, text, re.IGNORECASE)
	for match in matches:
	claims.append(match.group())
	return claims

	def generate_prompt(self, transcript: str):
	dates = self.extract_dates(transcript)
	claims = self.extract_claim_numbers(transcript)

	return f"""<s>[INST] Please analyze this meeting transcript with extreme precision and provide a structured analysis.
	Remember to:
	1. Only include information explicitly stated
	2. Mark unclear information as "UNCLEAR"
	3. Preserve exact numbers, dates, and claims
	4. Focus on factual content

	Identified dates: {', '.join(dates) if dates else 'None'}
	Identified claims: {', '.join(claims) if claims else 'None'}

	Please analyze:
	{transcript}

	Provide your analysis in this format:
	PARTICIPANTS:
	- List participants and their roles

	CONTEXT:
	- Meeting purpose
	- Duration (if mentioned)

	KEY POINTS:
	- Main topics
	- Decisions made
	- Important numbers/metrics

	ACTION ITEMS:
	- Tasks and assignments
	- Deadlines
	- Responsible parties

	FOLLOW UP:
	- Next meetings
	- Pending items [/INST]</s>"""

	def analyze_transcript(self, transcript: str):
	try:
	# Generate prompt
	prompt = self.generate_prompt(transcript)

	# Tokenize input
	inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)

	# Generate response
	with torch.no_grad():
	outputs = self.model.generate(
	**inputs,
	max_new_tokens=1000,
	temperature=0.1,
	do_sample=True,
	pad_token_id=self.tokenizer.eos_token_id
	)

	# Decode response
	response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Extract the assistant's response (after the prompt)
	response = response.split("[/INST]")[-1].strip()

	return response
	except Exception as e:
	return f"Error analyzing transcript: {str(e)}"

	def process_transcript(transcript: str):
	try:
	analyzer = TranscriptAnalyzer()
	analysis = analyzer.analyze_transcript(transcript)
	return analysis
	except Exception as e:
	return f"Error processing transcript: {str(e)}"

	# Create Gradio interface
	iface = gr.Interface(
	fn=process_transcript,
	inputs=[
	gr.Textbox(
	lines=10,
	label="Enter Meeting Transcript",
	placeholder="Paste your meeting transcript here..."
	)
	],
	outputs=gr.Textbox(
	label="Analysis Result",
	lines=20
	),
	title="Meeting Transcript Analyzer",
	description="Analyze meeting transcripts to extract key information, dates, claims, and action items.",
	examples=[
	["Meeting started on March 15, 2024 at 2:30 PM\nClaim #12345-ABC discussed regarding property damage\nJohn (Project Manager): Let's review the Q1 budget..."],
	["Sarah (Team Lead): Good morning everyone. Today's meeting is about the new product launch.\nMike (Marketing): We're targeting April 1st, 2024 for the release.\nClaim #789-XYZ needs to be resolved before launch."]
	]
	)

	# Launch the app
	if __name__ == "__main__":
	iface.launch()