Spaces:
Runtime error
Runtime error
| from huggingface_hub import hf_hub_download | |
| import joblib | |
| repo_id = "DevBhojani/Classification-SamsumDataset" | |
| model_filename = "random_forest_classifier_model.joblib" | |
| model_path = hf_hub_download(repo_id=repo_id, filename=model_filename) | |
| loaded_classifier_model = joblib.load(model_path) | |
| import joblib | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| repo_id = "DevBhojani/Classification-SamsumDataset" | |
| model_filename = "random_forest_classifier_model.joblib" | |
| vectorizer_filename = "tfidf_vectorizer.joblib" | |
| model_path = hf_hub_download(repo_id=repo_id, filename=model_filename) | |
| vectorizer_path = hf_hub_download(repo_id=repo_id, filename=vectorizer_filename) | |
| loaded_classifier_model = joblib.load(model_path) | |
| loaded_tfidf_vectorizer = joblib.load(vectorizer_path) | |
| import gradio as gr | |
| from transformers import pipeline, AutoTokenizer | |
| import re | |
| import contractions | |
| # Assuming loaded_classifier_model and loaded_tfidf_vectorizer are already loaded from the previous cell | |
| def remove_html_tags(text): | |
| pattern = r'[^a-zA-Z0-9\s]' | |
| text = re.sub(pattern, '', str(text)) | |
| return text | |
| def remove_url(text): | |
| pattern = re.compile(r'https?://\S+|www\.\S+') | |
| return pattern.sub(r'', str(text)) | |
| def remove_emojis(text): | |
| emoji_pattern = re.compile( | |
| "[" | |
| u"\U0001F600-\U0001F64F" # emoticons | |
| u"\U0001F300-\U0001F5FF" # symbols & pictographs | |
| u"\U0001F680-\U0001F6FF" # transport & map symbols | |
| u"\U0001F1E0-\U0001F1FF" # flags | |
| u"\U00002700-\U000027BF" # miscellaneous symbols | |
| u"\U0001F900-\U0001F9FF" # supplemental symbols | |
| u"\U00002600-\U000026FF" # weather & other symbols | |
| u"\U0001FA70-\U0001FAFF" # extended symbols | |
| "]+", | |
| flags=re.UNICODE | |
| ) | |
| return emoji_pattern.sub(r'', str(text)) | |
| def expand_contractions(text): | |
| return contractions.fix(text) | |
| def remove_special_and_numbers(text): | |
| return re.sub(r'[^a-zA-Z\s]', '', str(text)) | |
| def clean_text(text): | |
| text = remove_url(text) | |
| text = remove_emojis(text) | |
| text = expand_contractions(text) | |
| text = text.lower() | |
| return text | |
| summarizer = pipeline("summarization", model="luisotorres/bart-finetuned-samsum") | |
| # summarizer2 = pipeline("summarization", model="knkarthick/MEETING_SUMMARY") | |
| tokenizer = AutoTokenizer.from_pretrained("luisotorres/bart-finetuned-samsum") | |
| def split_into_chunks(conversation, n=15): | |
| lines = conversation.strip().split('\n') | |
| chunk_size = max(1, len(lines) // n) | |
| return ['\n'.join(lines[i:i+chunk_size]) for i in range(0, len(lines), chunk_size)] | |
| def truncate_chunk(text, max_tokens=1024): | |
| tokens = tokenizer.encode(text, truncation=True, max_length=max_tokens) | |
| return tokenizer.decode(tokens, skip_special_tokens=True) | |
| def summarize_chunks(chunks, model): | |
| summaries = [] | |
| for chunk in chunks: | |
| chunk = chunk.strip() | |
| if not chunk: | |
| continue | |
| try: | |
| truncated_chunk = truncate_chunk(chunk) | |
| summary = model(truncated_chunk, max_length=1024, min_length=20, do_sample=False)[0]['summary_text'] | |
| summaries.append(summary) | |
| except Exception as e: | |
| print(f"Error summarizing chunk: {e}") | |
| return summaries | |
| def combine_summaries(summaries): | |
| return ' '.join(summaries) | |
| def summarize_dialogue(conversation, model): | |
| chunks = split_into_chunks(conversation, n=1) | |
| summaries = summarize_chunks(chunks, model) | |
| final_summary = combine_summaries(summaries) | |
| return final_summary | |
| def analyze_meeting_transcript(user_input): | |
| if not user_input.strip(): | |
| return "Please enter some text to summarize.", "" | |
| cleaned_input = clean_text(user_input) | |
| summary1 = summarize_dialogue(cleaned_input, summarizer) | |
| # Use the loaded vectorizer to transform the input | |
| cleaned_input_vectorized = loaded_tfidf_vectorizer.transform([cleaned_input]) | |
| intent_classification = loaded_classifier_model.predict(cleaned_input_vectorized)[0] | |
| # print(intent_classification) | |
| # print(cleaned_input_vectorized) | |
| # intent_classification = "Transactional Inquiry & Information Exchange" | |
| # Format the intent classification output | |
| formatted_intent = intent_classification.replace("__label__", "").replace("_", " ") | |
| return summary1, formatted_intent | |
| interface = gr.Interface( | |
| fn=analyze_meeting_transcript, | |
| inputs=gr.Textbox(label="Enter dialogue here", lines=12, placeholder="Paste your meeting transcript..."), | |
| outputs=[ | |
| gr.Textbox(label="Summary (Luis Torres BART)"), | |
| # gr.Textbox(label="Summary 2 (KN Karthick MEETING_SUMMARY)"), | |
| gr.Textbox(label="Intent Classification") # Removed "Placeholder" | |
| ], | |
| title="Meeting Transcript Analyzer", | |
| description="Summarizes meeting dialogues and classifies the intent.", | |
| allow_flagging="never", | |
| examples=[ | |
| [ | |
| ''' | |
| Amanda: guess what! | |
| Chris: hey ;) ur pregnant! | |
| Amanda: I'm so proud of myself! Remember I go to these dancing classes with Michael? | |
| Chris: Yeah? | |
| Amanda: So we went yesterday and the instructor needed a partner to show the steps we had so far | |
| Chris: so there's only one guy teaching you? without a female partner? | |
| Amanda: Well, this time he was alone, BUT THAT'S NOT THE POINT! Listen! | |
| Chris: yeah, sorry :D tell me! | |
| Amanda: So he needed a partner and noone really knew the steps like perfectly | |
| Amanda: and obviously noone wanted to be mocked | |
| Amanda: so I thought, aaaah :D | |
| Chris: u volunteered? really? you?? | |
| Amanda: yeah! | |
| Chris: whooa! that's so great! #therapy #worthit :D | |
| Amanda: yeah i know :D maybe one day i'll actually stop being so shy | |
| Chris: that's definitely the first step! :D congrats! | |
| Amanda: tx ^_^ | |
| Chris: what dance was it? | |
| Amanda: English waltz | |
| Chris: isn't it, like, SO difficult? | |
| Amanda: yeah it is! but everyone said I looked like a pro :D | |
| Chris: Well done!! | |
| ''' | |
| ], | |
| ["I have some exciting news to share!"], | |
| [ | |
| ''' | |
| Beryl: Hello guys! How are you doing? We've lost contact for a few months now. Hope you are well. | |
| Anton: A happy hello to you Beryl! Great to hear from you. We are fine, thanks. And yourself? | |
| Beryl: I'm very well indeed. Thank you. Any changes in your setup? | |
| Anton: Not really. SOS. Same Old Soup ;) But we are happy for that. | |
| Beryl: Are you still running your lovely airbnb? | |
| Anton: Oh yes, we are. We had a few months off during summer, our summer, but now bookings start flowing in. Well... Are you planning to visit us? You two are always welcome! | |
| Beryl: You caught me here. I'm vaguely considering going down to Onrus again, most likely in January. What does it look like with vacancies then? | |
| Anton: Perfect! Just give me your dates and I'll keep it booked for you. | |
| Beryl: Would you prefer me to do it via airbnb website or just like this directly with you? | |
| Anton: I think it'll be more advantageous for both of us to do it directly. Do you know exactly when you'll be coming? | |
| Beryl: Not so much. Can I get back to you in 2, 3 days' time? | |
| Anton: ASAP really. As I say we've been receiving bookings daily now. | |
| Beryl: Well, no big deal. I'll be staying in Cape Town for a longer time and am quite flexible in my dates. | |
| Anton: Will you be coming with Tino, if I may ask? | |
| Beryl: No. I am single again. Hurray! So pls make it single occupancy any week in January, Anton. | |
| Anton: Great! 4th till 12th? | |
| Beryl: Very good. I'll call you beforehand from Cape Town. Greetings to you both! | |
| Anton: Take care!''' | |
| ], | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| interface.launch(debug=True, share=True) |