Spaces:

advaith2909
/

Smart-Summarizer

Build error

File size: 3,952 Bytes

import spacy.cli
spacy.cli.download("en_core_web_sm")
from huggingface_hub import login
import os
import PyPDF2
import spacy
import nltk
from transformers import pipeline
import whisper
import json
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import pandas as pd
import re
from textblob import TextBlob
from spacy import displacy
import gradio as gr
from huggingface_hub import login

# Retrieve the Hugging Face token from secrets
token = os.getenv("Gemma")
if not token:
    raise ValueError("Token not found. Ensure it is set correctly in Secrets.")

# Authenticate with the Hugging Face Hub
login(token=token)

# Load the summarization pipeline
summarizer = pipeline(
    "summarization", 
    model="google/gemma-2-2b-it", 
    use_auth_token=token
)

# Initialize spaCy globally
nlp = spacy.load("en_core_web_sm")

# Text preprocessing
def preprocess_text(text):
    doc = nlp(text)
    tokens = [token.text for token in doc if not token.is_stop and not token.is_punct]
    cleaned_text = " ".join(tokens)
    return cleaned_text

# Text summarization
def summarize_text(text):
    summary = summarizer(text, max_length=400, min_length=50, do_sample=False)
    return summary[0]['summary_text']

# Sentiment analysis
def sentiment_analysis(text):
    blob = TextBlob(text)
    sentiment = blob.sentiment.polarity
    if sentiment > 0:
        return "Positive"
    elif sentiment < 0:
        return "Negative"
    else:
        return "Neutral"

# Keyword extraction
def extract_keywords(text):
    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform([text])
    feature_names = np.array(vectorizer.get_feature_names_out())
    sorted_idx = tfidf_matrix.sum(axis=0).argsort()[::-1]
    top_keywords = feature_names[sorted_idx[:10]]
    return top_keywords.tolist()

# Decision/action item extraction
def extract_decisions(text):
    doc = nlp(text)
    decisions = []
    for sent in doc.sents:
        for token in sent:
            if token.dep_ == "ROOT" and token.pos_ == "VERB":
                decisions.append(sent.text)
    return decisions


# Backend function to handle uploaded file
def handle_file_upload(uploaded_file):
    if uploaded_file:
        # Extract text from the PDF
        pdf_reader = PyPDF2.PdfReader(uploaded_file)
        text = ""
        for page in pdf_reader.pages:
            text += page.extract_text()

        # Preprocess text
        cleaned_text = preprocess_text(text)

        # Summarize text
        summary = summarize_text(cleaned_text)

        # Sentiment analysis
        sentiment = sentiment_analysis(text)

        # Extract Keywords
        keywords = extract_keywords(text)

        # Extract decisions/action items
        decisions = extract_decisions(text)

        return {
            'summary': summary,
            'sentiment': sentiment,
            'keywords': keywords,
            'decisions': decisions
        }
    else:
        return None


# Gradio Interface
def process_file(file):
    if file is not None:
        results = handle_file_upload(file)
        if results:
            return (
                results['summary'], 
                results['sentiment'], 
                ", ".join(map(str, results['keywords'])), 
                "\n".join(results['decisions'])
            )
    return "No file uploaded!", "N/A", "N/A", "N/A"

# Define Gradio interface
interface = gr.Interface(
    fn=process_file,
    inputs=gr.File(label="Upload a PDF File"),
    outputs=[
        gr.Textbox(label="Summary"),
        gr.Textbox(label="Sentiment Analysis"),
        gr.Textbox(label="Keywords"),
        gr.Textbox(label="Decisions/Action Items")
    ],
    title="Smart Meeting Summarizer",
    description="Upload your meeting notes or PDF file to get a summary, sentiment analysis, keywords, and decisions/action items."
)

# Launch the Gradio app
interface.launch(debug=True, share=True)