tiya1012 commited on
Commit
49d943b
·
verified ·
1 Parent(s): c22edae

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -64
app.py CHANGED
@@ -1,64 +1,114 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- demo = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
- ],
60
- )
61
-
62
-
63
- if __name__ == "__main__":
64
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from sklearn.feature_extraction.text import TfidfVectorizer
4
+ from sklearn.ensemble import RandomForestClassifier
5
+ import matplotlib.pyplot as plt
6
+ from wordcloud import WordCloud
7
+ from matplotlib.font_manager import FontProperties
8
+ import os
9
+
10
+ # Function to generate word cloud for cleaned_text
11
+ def plot_wordcloud(text_data, stopwords, width=500, height=500, background_color="White", collocations=True, min_font_size=5):
12
+ """Generates a word cloud for cleaned text."""
13
+ wordcloud = WordCloud(
14
+ width=width,
15
+ height=height,
16
+ background_color=background_color,
17
+ stopwords=stopwords,
18
+ collocations=collocations,
19
+ min_font_size=min_font_size
20
+ ).generate(text_data)
21
+
22
+ plt.figure(figsize=(10, 10))
23
+ plt.imshow(wordcloud, interpolation="bilinear")
24
+ plt.axis("off")
25
+ plt.title("Word Cloud")
26
+ plt.show()
27
+
28
+ # Function to process uploaded file and predict sentiment
29
+ def analyze_sentiment(file):
30
+ try:
31
+ # Load CSV
32
+ df = pd.read_csv(file.name)
33
+
34
+ # Ensure the required column exists
35
+ if 'cleaned_text' not in df.columns or 'sentiment_label' not in df.columns:
36
+ return "Error: The uploaded CSV must contain 'cleaned_text' and 'sentiment_label' columns."
37
+
38
+ # Extract text and labels
39
+ X = df['cleaned_text']
40
+ y = df['sentiment_label']
41
+
42
+ # Vectorize text using TF-IDF
43
+ vectorizer = TfidfVectorizer()
44
+ X_vectorized = vectorizer.fit_transform(X)
45
+
46
+ # Train Random Forest Classifier on the entire dataset
47
+ model = RandomForestClassifier(random_state=42)
48
+ model.fit(X_vectorized, y)
49
+
50
+ # Predict sentiment for the entire dataset
51
+ df['predicted_sentiment'] = model.predict(X_vectorized)
52
+
53
+ # Generate sentiment distribution histogram
54
+ plt.figure(figsize=(8, 6))
55
+ sentiment_counts = df['predicted_sentiment'].value_counts(normalize=True) * 100
56
+ sentiment_counts.sort_index().plot(kind='bar', color=['blue', 'orange', 'green'], alpha=0.7)
57
+ plt.title("Predicted Sentiment Distribution")
58
+ plt.xlabel("Sentiment Labels")
59
+ plt.ylabel("Percentage")
60
+ plt.xticks(ticks=[0, 1, 2], labels=["Negative (0)", "Positive (1)", "Neutral (2)"], rotation=45)
61
+ plt.grid(axis="y", linestyle="--", alpha=0.7)
62
+
63
+ # Save the histogram as an image
64
+ histogram_path = "sentiment_histogram.png"
65
+ plt.tight_layout()
66
+ plt.savefig(histogram_path)
67
+ plt.close()
68
+
69
+ # Generate a word cloud for cleaned_text
70
+ text_data = " ".join(X.astype(str))
71
+ stopwords = set()
72
+
73
+ # Plot and save the word cloud
74
+ plot_wordcloud(text_data, stopwords)
75
+
76
+ wordcloud_path = "wordcloud.png"
77
+ plt.savefig(wordcloud_path)
78
+ plt.close()
79
+
80
+ # Display summary
81
+ positive_percentage = sentiment_counts.get(1, 0)
82
+ negative_percentage = sentiment_counts.get(0, 0)
83
+ neutral_percentage = sentiment_counts.get(2, 0)
84
+
85
+ summary = (f"Sentiment Summary:\n"
86
+ f"Positive: {positive_percentage:.2f}%\n"
87
+ f"Negative: {negative_percentage:.2f}%\n"
88
+ f"Neutral: {neutral_percentage:.2f}%")
89
+
90
+ # Display results
91
+ return (
92
+ summary,
93
+ histogram_path,
94
+ wordcloud_path
95
+ )
96
+
97
+ except Exception as e:
98
+ return f"Error processing the file: {str(e)}"
99
+
100
+ # Gradio Interface
101
+ with gr.Blocks() as demo:
102
+ gr.Markdown("# Sentiment Analysis Chatbot")
103
+ gr.Markdown("Please upload a CSV file with 'cleaned_text' and 'sentiment_label' columns.")
104
+
105
+ file_input = gr.File(label="Upload CSV File", file_types=[".csv"])
106
+ output_text = gr.Textbox(label="Message", lines=5)
107
+ output_histogram = gr.Image(label="Sentiment Histogram")
108
+ output_wordcloud = gr.Image(label="Word Cloud")
109
+
110
+ analyze_button = gr.Button("Analyze Sentiment")
111
+ analyze_button.click(analyze_sentiment, inputs=file_input, outputs=[output_text, output_histogram, output_wordcloud])
112
+
113
+ # Save as app.py
114
+ demo.launch()