Spaces:

advaith2909
/

Smart-Summarizer

Build error

App Files Files Community

advaith2909 commited on Dec 1, 2024

Commit

6abc153

verified ·

1 Parent(s): f4bcb3f

Create app.py

Browse files

Files changed (1) hide show

app.py +128 -0

app.py ADDED Viewed

	@@ -0,0 +1,128 @@

+python -m spacy download en_core_web_sm
+from huggingface_hub import login
+import os
+import PyPDF2
+import spacy
+import nltk
+from transformers import pipeline
+import whisper
+import json
+from sklearn.feature_extraction.text import TfidfVectorizer
+import numpy as np
+import pandas as pd
+import re
+from textblob import TextBlob
+from spacy import displacy
+import gradio as gr
+# Initialize spaCy model and other NLP tools
+nlp = spacy.load("en_core_web_sm")
+# Download the Gemma 2 model
+summarizer = pipeline("summarization", model="google/gemma-2-2b-it")
+# Text preprocessing
+def preprocess_text(text):
+    doc = nlp(text)
+    tokens = [token.text for token in doc if not token.is_stop and not token.is_punct]
+    cleaned_text = " ".join(tokens)
+    return cleaned_text
+# Text summarization
+def summarize_text(text):
+    summary = summarizer(text, max_length=400, min_length=50, do_sample=False)
+    return summary[0]['summary_text']
+# Sentiment analysis
+def sentiment_analysis(text):
+    blob = TextBlob(text)
+    sentiment = blob.sentiment.polarity
+    if sentiment > 0:
+        return "Positive"
+    elif sentiment < 0:
+        return "Negative"
+    else:
+        return "Neutral"
+# Keyword extraction
+def extract_keywords(text):
+    vectorizer = TfidfVectorizer(stop_words='english')
+    tfidf_matrix = vectorizer.fit_transform([text])
+    feature_names = np.array(vectorizer.get_feature_names_out())
+    sorted_idx = tfidf_matrix.sum(axis=0).argsort()[::-1]
+    top_keywords = feature_names[sorted_idx[:10]]
+    return top_keywords.tolist()
+# Decision/action item extraction
+def extract_decisions(text):
+    doc = nlp(text)
+    decisions = []
+    for sent in doc.sents:
+        for token in sent:
+            if token.dep_ == "ROOT" and token.pos_ == "VERB":
+                decisions.append(sent.text)
+    return decisions
+# Backend function to handle uploaded file
+def handle_file_upload(uploaded_file):
+    if uploaded_file:
+        # Extract text from the PDF
+        pdf_reader = PyPDF2.PdfReader(uploaded_file)
+        text = ""
+        for page in pdf_reader.pages:
+            text += page.extract_text()
+        # Preprocess text
+        cleaned_text = preprocess_text(text)
+        # Summarize text
+        summary = summarize_text(cleaned_text)
+        # Sentiment analysis
+        sentiment = sentiment_analysis(text)
+        # Extract Keywords
+        keywords = extract_keywords(text)
+        # Extract decisions/action items
+        decisions = extract_decisions(text)
+        return {
+            'summary': summary,
+            'sentiment': sentiment,
+            'keywords': keywords,
+            'decisions': decisions
+        }
+    else:
+        return None
+# Gradio Interface
+def process_file(file):
+    if file is not None:
+        results = handle_file_upload(file)
+        if results:
+            return (
+                results['summary'],
+                results['sentiment'],
+                ", ".join(map(str, results['keywords'])),
+                "\n".join(results['decisions'])
+            )
+    return "No file uploaded!", "N/A", "N/A", "N/A"
+# Define Gradio interface
+interface = gr.Interface(
+    fn=process_file,
+    inputs=gr.File(label="Upload a PDF File"),
+    outputs=[
+        gr.Textbox(label="Summary"),
+        gr.Textbox(label="Sentiment Analysis"),
+        gr.Textbox(label="Keywords"),
+        gr.Textbox(label="Decisions/Action Items")
+    ],
+    title="Smart Meeting Summarizer",
+    description="Upload your meeting notes or PDF file to get a summary, sentiment analysis, keywords, and decisions/action items."
+)
+# Launch the Gradio app
+interface.launch(debug=True)