Spaces:
Build error
Build error
| import spacy.cli | |
| spacy.cli.download("en_core_web_sm") | |
| from huggingface_hub import login | |
| import os | |
| import PyPDF2 | |
| import spacy | |
| import nltk | |
| from transformers import pipeline | |
| import whisper | |
| import json | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| import numpy as np | |
| import pandas as pd | |
| import re | |
| from textblob import TextBlob | |
| from spacy import displacy | |
| import gradio as gr | |
| from huggingface_hub import login | |
| # Retrieve the Hugging Face token from secrets | |
| token = os.getenv("Gemma") | |
| if not token: | |
| raise ValueError("Token not found. Ensure it is set correctly in Secrets.") | |
| # Authenticate with the Hugging Face Hub | |
| login(token=token) | |
| # Load the summarization pipeline | |
| summarizer = pipeline( | |
| "summarization", | |
| model="google/gemma-2-2b-it", | |
| use_auth_token=token | |
| ) | |
| # Initialize spaCy globally | |
| nlp = spacy.load("en_core_web_sm") | |
| # Text preprocessing | |
| def preprocess_text(text): | |
| doc = nlp(text) | |
| tokens = [token.text for token in doc if not token.is_stop and not token.is_punct] | |
| cleaned_text = " ".join(tokens) | |
| return cleaned_text | |
| #Text summarization | |
| def summarize_text(text): | |
| if not text.strip(): # Check for empty text | |
| raise ValueError("Input text is empty or invalid.") | |
| input_length = len(text.split()) # Approximate input length in words | |
| max_len = min(400, int(0.8 * input_length)) # Limit max_length dynamically | |
| print(f"Input length: {input_length}, max_length: {max_len}") # Debugging info | |
| # Summarization with the pipeline (without use_auth_token) | |
| summary = summarizer(text, max_length=max_len, min_length=50, do_sample=False) # No use_auth_token | |
| return summary[0]['summary_text'] | |
| # Sentiment analysis | |
| def sentiment_analysis(text): | |
| blob = TextBlob(text) | |
| sentiment = blob.sentiment.polarity | |
| if sentiment > 0: | |
| return "Positive" | |
| elif sentiment < 0: | |
| return "Negative" | |
| else: | |
| return "Neutral" | |
| # Keyword extraction | |
| def extract_keywords(text): | |
| vectorizer = TfidfVectorizer(stop_words='english') | |
| tfidf_matrix = vectorizer.fit_transform([text]) | |
| feature_names = np.array(vectorizer.get_feature_names_out()) | |
| sorted_idx = tfidf_matrix.sum(axis=0).argsort()[::-1] | |
| top_keywords = feature_names[sorted_idx[:10]] | |
| return top_keywords.tolist() | |
| # Decision/action item extraction | |
| def extract_decisions(text): | |
| doc = nlp(text) | |
| decisions = [] | |
| for sent in doc.sents: | |
| for token in sent: | |
| if token.dep_ == "ROOT" and token.pos_ == "VERB": | |
| decisions.append(sent.text) | |
| return decisions | |
| # Backend function to handle uploaded file | |
| def handle_file_upload(uploaded_file): | |
| if uploaded_file: | |
| # Extract text from the PDF | |
| pdf_reader = PyPDF2.PdfReader(uploaded_file) | |
| text = "" | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() | |
| # Preprocess text | |
| cleaned_text = preprocess_text(text) | |
| # Summarize text | |
| summary = summarize_text(cleaned_text) | |
| # Sentiment analysis | |
| sentiment = sentiment_analysis(text) | |
| # Extract Keywords | |
| keywords = extract_keywords(text) | |
| # Extract decisions/action items | |
| decisions = extract_decisions(text) | |
| return { | |
| 'summary': summary, | |
| 'sentiment': sentiment, | |
| 'keywords': keywords, | |
| 'decisions': decisions | |
| } | |
| else: | |
| return None | |
| # Gradio Interface | |
| def process_file(file): | |
| if file is not None: | |
| results = handle_file_upload(file) | |
| if results: | |
| return ( | |
| results['summary'], | |
| results['sentiment'], | |
| ", ".join(map(str, results['keywords'])), | |
| "\n".join(results['decisions']) | |
| ) | |
| return "No file uploaded!", "N/A", "N/A", "N/A" | |
| # Define Gradio interface | |
| interface = gr.Interface( | |
| fn=process_file, | |
| inputs=gr.File(label="Upload a PDF File"), | |
| outputs=[ | |
| gr.Textbox(label="Summary"), | |
| gr.Textbox(label="Sentiment Analysis"), | |
| gr.Textbox(label="Keywords"), | |
| gr.Textbox(label="Decisions/Action Items") | |
| ], | |
| title="Smart Meeting Summarizer", | |
| description="Upload your meeting notes or PDF file to get a summary, sentiment analysis, keywords, and decisions/action items." | |
| ) | |
| # Launch the Gradio app | |
| interface.launch(debug=True, share=True) | |