File size: 3,952 Bytes
33c334b
 
6abc153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8029d94
d53a26f
8029d94
 
 
 
d53a26f
8029d94
 
6abc153
8029d94
29b5526
8029d94
 
 
29b5526
6abc153
e3282db
 
 
6abc153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5252d6f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import spacy.cli
spacy.cli.download("en_core_web_sm")
from huggingface_hub import login
import os
import PyPDF2
import spacy
import nltk
from transformers import pipeline
import whisper
import json
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import pandas as pd
import re
from textblob import TextBlob
from spacy import displacy
import gradio as gr
from huggingface_hub import login

# Retrieve the Hugging Face token from secrets
token = os.getenv("Gemma")
if not token:
    raise ValueError("Token not found. Ensure it is set correctly in Secrets.")

# Authenticate with the Hugging Face Hub
login(token=token)

# Load the summarization pipeline
summarizer = pipeline(
    "summarization", 
    model="google/gemma-2-2b-it", 
    use_auth_token=token
)

# Initialize spaCy globally
nlp = spacy.load("en_core_web_sm")

# Text preprocessing
def preprocess_text(text):
    doc = nlp(text)
    tokens = [token.text for token in doc if not token.is_stop and not token.is_punct]
    cleaned_text = " ".join(tokens)
    return cleaned_text

# Text summarization
def summarize_text(text):
    summary = summarizer(text, max_length=400, min_length=50, do_sample=False)
    return summary[0]['summary_text']

# Sentiment analysis
def sentiment_analysis(text):
    blob = TextBlob(text)
    sentiment = blob.sentiment.polarity
    if sentiment > 0:
        return "Positive"
    elif sentiment < 0:
        return "Negative"
    else:
        return "Neutral"

# Keyword extraction
def extract_keywords(text):
    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform([text])
    feature_names = np.array(vectorizer.get_feature_names_out())
    sorted_idx = tfidf_matrix.sum(axis=0).argsort()[::-1]
    top_keywords = feature_names[sorted_idx[:10]]
    return top_keywords.tolist()

# Decision/action item extraction
def extract_decisions(text):
    doc = nlp(text)
    decisions = []
    for sent in doc.sents:
        for token in sent:
            if token.dep_ == "ROOT" and token.pos_ == "VERB":
                decisions.append(sent.text)
    return decisions


# Backend function to handle uploaded file
def handle_file_upload(uploaded_file):
    if uploaded_file:
        # Extract text from the PDF
        pdf_reader = PyPDF2.PdfReader(uploaded_file)
        text = ""
        for page in pdf_reader.pages:
            text += page.extract_text()

        # Preprocess text
        cleaned_text = preprocess_text(text)

        # Summarize text
        summary = summarize_text(cleaned_text)

        # Sentiment analysis
        sentiment = sentiment_analysis(text)

        # Extract Keywords
        keywords = extract_keywords(text)

        # Extract decisions/action items
        decisions = extract_decisions(text)

        return {
            'summary': summary,
            'sentiment': sentiment,
            'keywords': keywords,
            'decisions': decisions
        }
    else:
        return None


# Gradio Interface
def process_file(file):
    if file is not None:
        results = handle_file_upload(file)
        if results:
            return (
                results['summary'], 
                results['sentiment'], 
                ", ".join(map(str, results['keywords'])), 
                "\n".join(results['decisions'])
            )
    return "No file uploaded!", "N/A", "N/A", "N/A"

# Define Gradio interface
interface = gr.Interface(
    fn=process_file,
    inputs=gr.File(label="Upload a PDF File"),
    outputs=[
        gr.Textbox(label="Summary"),
        gr.Textbox(label="Sentiment Analysis"),
        gr.Textbox(label="Keywords"),
        gr.Textbox(label="Decisions/Action Items")
    ],
    title="Smart Meeting Summarizer",
    description="Upload your meeting notes or PDF file to get a summary, sentiment analysis, keywords, and decisions/action items."
)

# Launch the Gradio app
interface.launch(debug=True, share=True)