Spaces:

TuanScientist
/

Sentiment2

Build error

App Files Files Community

TuanScientist commited on Jul 18, 2024

Commit

57949d9

verified ·

1 Parent(s): af45611

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -23

app.py CHANGED Viewed

@@ -13,15 +13,16 @@ import underthesea
 senti_model = RobertaForSequenceClassification.from_pretrained("wonrax/phobert-base-vietnamese-sentiment")
 senti_tokenizer = AutoTokenizer.from_pretrained("wonrax/phobert-base-vietnamese-sentiment", use_fast=False)
-def segmentation(sentences):
     segmented_sentences = []
     for sentence in sentences:
-        segmented_sentence = underthesea.word_tokenize(sentence)
-        segmented_sentences.append(' '.join(segmented_sentence))
     return segmented_sentences
 def analyze(sentence):
     input_ids = torch.tensor([senti_tokenizer.encode(sentence)])
     with torch.no_grad():
@@ -29,22 +30,19 @@ def analyze(sentence):
         results = out.logits.softmax(dim=-1).tolist()
         return results[0]
 def read_file(docx):
     try:
         text = docx2txt.process(docx)
-        lines = text.split('\n')
-        lines = [line.strip() for line in lines]
-        lines = [line for line in lines if line]
-        return lines
     except Exception as e:
         print(f"Error reading file: {e}")
 def process_file(docx):
-    # Read the file and segment the sentences
-    sentences = read_file(docx)
-    segmented_sentences = segmentation(sentences)
     # Analyze the sentiment of each sentence
     results = []
@@ -53,7 +51,7 @@ def process_file(docx):
     # Create a DataFrame from the results
     df = pd.DataFrame(results, columns=['Negative', 'Neutral', 'Positive'])
-    df['Text'] = read_file(docx)
     # Generate the pie chart and excel file
     pie_chart_name = generate_pie_chart(df)
@@ -61,17 +59,16 @@ def process_file(docx):
     return excel_file_path, pie_chart_name
 def analyze_text(text, docx_file):
     if text:
-        # Perform analysis on the text
-        segmented_text = segmentation([text])
         results = []
         for sentence in segmented_text:
             results.append(analyze(sentence))
         df = pd.DataFrame(results, columns=['Negative', 'Neutral', 'Positive'])
-        df['Text'] = [text]
         pie_chart_name = generate_pie_chart(df)
         excel_file_path = generate_excel_file(df)
         return excel_file_path, pie_chart_name
@@ -83,7 +80,6 @@ def analyze_text(text, docx_file):
         # No input provided
         return None
 def generate_pie_chart(df):
     # Calculate the average scores
     neg_avg = df['Negative'].mean()
@@ -101,14 +97,13 @@ def generate_pie_chart(df):
     plt.pie(avg_df['Score'], labels=avg_df['Sentiment'], colors=colors, autopct='%1.1f%%')
     plt.title('Average Scores by Sentiment')
-    # Save the pie chart as an image file in the static folder
     pie_chart_name = 'pie_chart.png'
     plt.savefig(pie_chart_name)
     plt.close()
     return pie_chart_name
 def generate_excel_file(df):
     # Create a new workbook and worksheet
     wb = openpyxl.Workbook()
@@ -158,7 +153,6 @@ def generate_excel_file(df):
     return excel_file_path
 inputs = [
     gr.Textbox(label="Nhập Văn Bản bằng Tiếng Việt để trải nghiệm ngay"),
     gr.File(label="Chọn Tệp File Word(docx) Bạn Muốn Phân Tích")
@@ -179,3 +173,4 @@ interface = gr.Interface(
 if __name__ == "__main__":
     interface.launch()

 senti_model = RobertaForSequenceClassification.from_pretrained("wonrax/phobert-base-vietnamese-sentiment")
 senti_tokenizer = AutoTokenizer.from_pretrained("wonrax/phobert-base-vietnamese-sentiment", use_fast=False)
+def segmentation(text):
+    sentences = text.split('.')
     segmented_sentences = []
     for sentence in sentences:
+        sentence = sentence.strip()
+        if sentence:  # ignore empty sentences
+            segmented_sentence = underthesea.word_tokenize(sentence)
+            segmented_sentences.append(' '.join(segmented_sentence))
     return segmented_sentences
 def analyze(sentence):
     input_ids = torch.tensor([senti_tokenizer.encode(sentence)])
     with torch.no_grad():
         results = out.logits.softmax(dim=-1).tolist()
         return results[0]
 def read_file(docx):
     try:
         text = docx2txt.process(docx)
+        return text
     except Exception as e:
         print(f"Error reading file: {e}")
 def process_file(docx):
+    # Read the file
+    text = read_file(docx)
+    # Segment the text into sentences
+    segmented_sentences = segmentation(text)
     # Analyze the sentiment of each sentence
     results = []
     # Create a DataFrame from the results
     df = pd.DataFrame(results, columns=['Negative', 'Neutral', 'Positive'])
+    df['Text'] = segmented_sentences
     # Generate the pie chart and excel file
     pie_chart_name = generate_pie_chart(df)
     return excel_file_path, pie_chart_name
 def analyze_text(text, docx_file):
     if text:
+        # Segment the text into sentences
+        segmented_text = segmentation(text)
         results = []
         for sentence in segmented_text:
             results.append(analyze(sentence))
         df = pd.DataFrame(results, columns=['Negative', 'Neutral', 'Positive'])
+        df['Text'] = segmented_text
         pie_chart_name = generate_pie_chart(df)
         excel_file_path = generate_excel_file(df)
         return excel_file_path, pie_chart_name
         # No input provided
         return None
 def generate_pie_chart(df):
     # Calculate the average scores
     neg_avg = df['Negative'].mean()
     plt.pie(avg_df['Score'], labels=avg_df['Sentiment'], colors=colors, autopct='%1.1f%%')
     plt.title('Average Scores by Sentiment')
+    # Save the pie chart as an image file
     pie_chart_name = 'pie_chart.png'
     plt.savefig(pie_chart_name)
     plt.close()
     return pie_chart_name
 def generate_excel_file(df):
     # Create a new workbook and worksheet
     wb = openpyxl.Workbook()
     return excel_file_path
 inputs = [
     gr.Textbox(label="Nhập Văn Bản bằng Tiếng Việt để trải nghiệm ngay"),
     gr.File(label="Chọn Tệp File Word(docx) Bạn Muốn Phân Tích")
 if __name__ == "__main__":
     interface.launch()