Spaces:
Build error
Build error
File size: 4,392 Bytes
33c334b 6abc153 8029d94 d53a26f 8029d94 d53a26f 8029d94 6abc153 8029d94 29b5526 8029d94 29b5526 6abc153 de94233 6abc153 c30c941 6abc153 b70bff7 c30c941 b70bff7 6abc153 5252d6f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 | import spacy.cli
spacy.cli.download("en_core_web_sm")
from huggingface_hub import login
import os
import PyPDF2
import spacy
import nltk
from transformers import pipeline
import whisper
import json
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import pandas as pd
import re
from textblob import TextBlob
from spacy import displacy
import gradio as gr
from huggingface_hub import login
# Retrieve the Hugging Face token from secrets
token = os.getenv("Gemma")
if not token:
raise ValueError("Token not found. Ensure it is set correctly in Secrets.")
# Authenticate with the Hugging Face Hub
login(token=token)
# Load the summarization pipeline
summarizer = pipeline(
"summarization",
model="google/gemma-2-2b-it",
use_auth_token=token
)
# Initialize spaCy globally
nlp = spacy.load("en_core_web_sm")
# Text preprocessing
def preprocess_text(text):
doc = nlp(text)
tokens = [token.text for token in doc if not token.is_stop and not token.is_punct]
cleaned_text = " ".join(tokens)
return cleaned_text
#Text summarization
def summarize_text(text):
if not text.strip(): # Check for empty text
raise ValueError("Input text is empty or invalid.")
input_length = len(text.split()) # Approximate input length in words
max_len = min(400, int(0.8 * input_length)) # Limit max_length dynamically
print(f"Input length: {input_length}, max_length: {max_len}") # Debugging info
# Summarization with the pipeline (without use_auth_token)
summary = summarizer(text, max_length=max_len, min_length=50, do_sample=False) # No use_auth_token
return summary[0]['summary_text']
# Sentiment analysis
def sentiment_analysis(text):
blob = TextBlob(text)
sentiment = blob.sentiment.polarity
if sentiment > 0:
return "Positive"
elif sentiment < 0:
return "Negative"
else:
return "Neutral"
# Keyword extraction
def extract_keywords(text):
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform([text])
feature_names = np.array(vectorizer.get_feature_names_out())
sorted_idx = tfidf_matrix.sum(axis=0).argsort()[::-1]
top_keywords = feature_names[sorted_idx[:10]]
return top_keywords.tolist()
# Decision/action item extraction
def extract_decisions(text):
doc = nlp(text)
decisions = []
for sent in doc.sents:
for token in sent:
if token.dep_ == "ROOT" and token.pos_ == "VERB":
decisions.append(sent.text)
return decisions
# Backend function to handle uploaded file
def handle_file_upload(uploaded_file):
if uploaded_file:
# Extract text from the PDF
pdf_reader = PyPDF2.PdfReader(uploaded_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
# Preprocess text
cleaned_text = preprocess_text(text)
# Summarize text
summary = summarize_text(cleaned_text)
# Sentiment analysis
sentiment = sentiment_analysis(text)
# Extract Keywords
keywords = extract_keywords(text)
# Extract decisions/action items
decisions = extract_decisions(text)
return {
'summary': summary,
'sentiment': sentiment,
'keywords': keywords,
'decisions': decisions
}
else:
return None
# Gradio Interface
def process_file(file):
if file is not None:
results = handle_file_upload(file)
if results:
return (
results['summary'],
results['sentiment'],
", ".join(map(str, results['keywords'])),
"\n".join(results['decisions'])
)
return "No file uploaded!", "N/A", "N/A", "N/A"
# Define Gradio interface
interface = gr.Interface(
fn=process_file,
inputs=gr.File(label="Upload a PDF File"),
outputs=[
gr.Textbox(label="Summary"),
gr.Textbox(label="Sentiment Analysis"),
gr.Textbox(label="Keywords"),
gr.Textbox(label="Decisions/Action Items")
],
title="Smart Meeting Summarizer",
description="Upload your meeting notes or PDF file to get a summary, sentiment analysis, keywords, and decisions/action items."
)
# Launch the Gradio app
interface.launch(debug=True, share=True)
|