Spaces:
Runtime error
Runtime error
| import PyPDF2 as pdf | |
| from sklearn.feature_extraction.text import CountVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import streamlit as st | |
| import bert_score | |
| from rouge_score import rouge_scorer | |
| from transformers import T5ForConditionalGeneration, T5Tokenizer | |
| from difflib import SequenceMatcher | |
| from nltk.sentiment import SentimentIntensityAnalyzer | |
| import matplotlib.pyplot as plt | |
| import nltk | |
| nltk.download('vader_lexicon') | |
| st.set_page_config(page_title="Streamlit Sentiment App", page_icon="static/res/favicon.png") | |
| # Initialize the model and tokenizer | |
| model = T5ForConditionalGeneration.from_pretrained("t5-base") | |
| tokenizer = T5Tokenizer.from_pretrained("t5-base") | |
| def extract_text(uploaded_file): | |
| text = "" | |
| if uploaded_file: | |
| reader = pdf.PdfReader(uploaded_file) | |
| for page in reader.pages: | |
| text += page.extract_text() | |
| return text | |
| def calculate_similarity(text1, text2): | |
| vectorizer = CountVectorizer().fit_transform([text1, text2]) | |
| vectors = vectorizer.toarray() | |
| return cosine_similarity(vectors)[0][1] | |
| def bert_similarity(text1, text2): | |
| P, R, F1 = bert_score.score([text1], [text2], lang="en", verbose=True) | |
| return F1.item() | |
| def rouge_similarity(text1, text2): | |
| scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True) | |
| scores = scorer.score(text1, text2) | |
| return scores['rougeL'].fmeasure | |
| def highlight_similarity(text1, text2): | |
| matcher = SequenceMatcher(None, text1, text2) | |
| matches = matcher.get_matching_blocks() | |
| highlighted_text = "" | |
| for match in matches: | |
| start1 = match.a | |
| end1 = match.a + match.size | |
| start2 = match.b | |
| end2 = match.b + match.size | |
| # Highlight the matching subsequence | |
| highlighted_text += text1[start1:end1] + '\n' | |
| highlighted_text += text2[start2:end2] + '\n\n' | |
| return highlighted_text | |
| def generate_summary(text): | |
| # Encode the text | |
| inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1000, truncation=True) | |
| # Generate the summary | |
| outputs = model.generate(inputs, max_length=1000, min_length=100, length_penalty=2.0, num_beams=4, early_stopping=True) | |
| # Decode the summary | |
| summary = tokenizer.decode(outputs[0]) | |
| return summary | |
| def predict_sentiment(text, threshold_positive, threshold_negative): | |
| sid = SentimentIntensityAnalyzer() | |
| sentiment_scores = sid.polarity_scores(text) | |
| threshold_positive = float(threshold_positive) | |
| threshold_negative = float(threshold_negative) | |
| if sentiment_scores.get("compound", 0) >= threshold_positive: | |
| return "Positive" | |
| elif sentiment_scores.get("compound", 0) <= threshold_negative: | |
| return "Negative" | |
| else: | |
| return "Neutral" | |
| def main(): | |
| st.title("Text Analysis App") | |
| st.write("This app checks the similarity between two PDF files using different similarity metrics or generates a summary for a single document or does the sentiment analyis.") | |
| st.write("Upload PDF files, select an option from the dropdown menu, and proceed accordingly.") | |
| option = st.selectbox("Select Option", ["Check Similarity", "Generate Summary", "Sentiment Analysis"]) | |
| if option == "Check Similarity": | |
| uploaded_file1 = st.file_uploader("Choose a PDF file 1", type="pdf") | |
| uploaded_file2 = st.file_uploader("Choose a PDF file 2", type="pdf") | |
| st.sidebar.title("Similarity Metrics") | |
| st.sidebar.write("**Cosine Similarity**:") | |
| st.sidebar.write("Measures how similar the two documents are based on their content.") | |
| st.sidebar.write("**BERT Score**:") | |
| st.sidebar.write("Provides a similarity measure based on contextual embeddings of the documents.") | |
| st.sidebar.write("**ROUGE Score**:") | |
| st.sidebar.write("Evaluates the overlap in n-grams between the two documents.") | |
| similarity_metric = st.selectbox("Select Similarity Metric", ["Cosine Similarity", "BERT Score", "ROUGE Score"]) | |
| if uploaded_file1 and uploaded_file2: | |
| if st.button("Check Similarity"): | |
| text1 = extract_text(uploaded_file1) | |
| text2 = extract_text(uploaded_file2) | |
| similarity = None | |
| if similarity_metric == "Cosine Similarity": | |
| similarity = calculate_similarity(text1, text2) | |
| st.write(f"The similarity between the two files is {similarity:.2f}.") | |
| elif similarity_metric == "BERT Score": | |
| bert_similarity_score = bert_similarity(text1, text2) | |
| st.write(f"The BERT similarity score between the two files is {bert_similarity_score:.2f}.") | |
| elif similarity_metric == "ROUGE Score": | |
| rouge_similarity_score = rouge_similarity(text1, text2) | |
| st.write(f"The ROUGE similarity score between the two files is {rouge_similarity_score:.2f}.") | |
| st.write("Highlighted Similarity:") | |
| st.write(highlight_similarity(text1, text2)) | |
| elif option == "Generate Summary": | |
| uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") | |
| if uploaded_file: | |
| if st.button("Generate Summary"): | |
| text = extract_text(uploaded_file) | |
| summary = generate_summary(text) | |
| st.write("Summary:") | |
| st.write(summary) | |
| elif option == "Sentiment Analysis": | |
| threshold_positive = st.number_input("Threshold for Positive Sentiment:", value=0.05, step=0.01) | |
| threshold_negative = st.number_input("Threshold for Negative Sentiment:", value=-0.05, step=0.01) | |
| uploaded_file = st.file_uploader("Upload PDF Document") | |
| if uploaded_file: | |
| pdf_reader = pdf.PdfReader(uploaded_file) | |
| positive_count = 0 | |
| negative_count = 0 | |
| neutral_count = 0 | |
| for page in pdf_reader.pages: | |
| text = page.extract_text() | |
| sentences = text.split(".") | |
| for sentence in sentences: | |
| sentence = sentence.strip() | |
| if sentence: | |
| sentiment = predict_sentiment(sentence, threshold_positive, threshold_negative) | |
| if sentiment == "Positive": | |
| positive_count += 1 | |
| elif sentiment == "Negative": | |
| negative_count += 1 | |
| else: | |
| neutral_count += 1 | |
| st.write("Positive Sentences:", positive_count) | |
| st.write("Negative Sentences:", negative_count) | |
| st.write("Neutral Sentences:", neutral_count) | |
| labels = ["Positive", "Negative", "Neutral"] | |
| sizes = [positive_count, negative_count, neutral_count] | |
| fig, ax = plt.subplots() | |
| ax.pie(sizes, labels=labels, autopct="%1.1f%%", startangle=90) | |
| ax.axis("equal") | |
| ax.set_title("Sentiment Distribution") | |
| st.pyplot(fig) | |
| if __name__ == "__main__": | |
| main() | |