File size: 2,979 Bytes
50ab210
 
 
 
 
 
7a96f21
e122fa9
 
 
 
 
 
 
50ab210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e122fa9
 
 
50ab210
 
1ba4bc9
 
 
 
 
 
 
 
 
 
 
 
 
 
50ab210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import streamlit as st
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import matplotlib.pyplot as plt
import time
import os
import os

# Use a custom cache directory for Hugging Face models
os.environ["HF_HOME"] = "./hf_cache"

# Ensure directory exists and is writable
os.makedirs("./hf_cache", exist_ok=True)

model_name = "tabularisai/multilingual-sentiment-analysis"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Function for sentiment prediction with progress bar
def predict_sentiment(texts):
    sentiments = []
    sentiment_map = {0: "Very Negative", 1: "Negative", 2: "Neutral", 3: "Positive", 4: "Very Positive"}
    progress_bar = st.progress(0)
    total_texts = len(texts)

    for i, text in enumerate(texts):
        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
        with torch.no_grad():
            outputs = model(**inputs)
        probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
        sentiment = sentiment_map[torch.argmax(probabilities, dim=-1).item()]
        sentiments.append(sentiment)

        # Update progress bar
        progress_bar.progress((i + 1) / total_texts)
        time.sleep(0.1)  # Optional: To better visualize progress
        
    return sentiments

# Streamlit UI
st.title("Sentiment Analysis App")
st.write("Upload a CSV or Excel file containing text data for sentiment analysis.")

# File upload
# uploaded_file = st.file_uploader("Upload a CSV or Excel file", type=["csv", "xlsx"])
uploaded_file = st.file_uploader("Upload a CSV or Excel file", type=["csv", "xlsx"], accept_multiple_files=False)


if uploaded_file is not None:
    try:
        # Read file
        if uploaded_file.name.endswith(".csv"):
            df = pd.read_csv(uploaded_file)
        else:
            df = pd.read_excel(uploaded_file)
        
        st.write("Dataset Preview:")
        st.dataframe(df.head())
        
        # Select text column
        text_column = st.selectbox("Select the text column for analysis", df.columns)
    except Exception as e:
        st.error(f"Error reading file: {e}")
    
    if st.button("Analyze Sentiment"):
        # Get text data
        texts = df[text_column].astype(str).tolist()
        
        # Predict sentiments with progress bar
        sentiments = predict_sentiment(texts)
        df["Sentiment"] = sentiments
        
        # Display results
        st.write("Sentiment Analysis Results:")
        st.dataframe(df[[text_column, "Sentiment"]])
        
        # Pie chart of sentiment distribution
        st.write("Sentiment Distribution:")
        sentiment_counts = df["Sentiment"].value_counts()
        fig, ax = plt.subplots()
        ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct="%1.1f%%", startangle=90)
        ax.axis("equal")
        st.pyplot(fig)