Spaces:
Build error
Build error
| from transformers import RobertaForSequenceClassification, AutoTokenizer | |
| import torch | |
| import docx2txt | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import openpyxl | |
| from openpyxl.styles import Font, Color, PatternFill | |
| from openpyxl.styles.colors import WHITE | |
| import gradio as gr | |
| import underthesea | |
| import re | |
| # Load the model and tokenizer | |
| senti_model = RobertaForSequenceClassification.from_pretrained("wonrax/phobert-base-vietnamese-sentiment") | |
| senti_tokenizer = AutoTokenizer.from_pretrained("wonrax/phobert-base-vietnamese-sentiment", use_fast=False) | |
| def segmentation(text): | |
| # Split text by periods and newlines | |
| sentences = re.split(r'[.\n]', text) | |
| segmented_sentences = [] | |
| for sentence in sentences: | |
| sentence = sentence.strip() | |
| if sentence: # Ignore empty sentences | |
| segmented_sentence = underthesea.word_tokenize(sentence) | |
| segmented_sentences.append(' '.join(segmented_sentence)) | |
| return segmented_sentences | |
| def analyze(sentence): | |
| input_ids = torch.tensor([senti_tokenizer.encode(sentence)]) | |
| with torch.no_grad(): | |
| out = senti_model(input_ids) | |
| results = out.logits.softmax(dim=-1).tolist() | |
| return results[0] | |
| def read_file(docx): | |
| try: | |
| text = docx2txt.process(docx) | |
| return text | |
| except Exception as e: | |
| print(f"Error reading file: {e}") | |
| def process_file(docx): | |
| # Read the file | |
| text = read_file(docx) | |
| # Segment the text into sentences | |
| segmented_sentences = segmentation(text) | |
| # Analyze the sentiment of each sentence | |
| results = [] | |
| for sentence in segmented_sentences: | |
| results.append(analyze(sentence)) | |
| # Create a DataFrame from the results | |
| df = pd.DataFrame(results, columns=['Negative', 'Neutral', 'Positive']) | |
| df['Text'] = segmented_sentences | |
| # Generate the pie chart and excel file | |
| pie_chart_name = generate_pie_chart(df) | |
| excel_file_path = generate_excel_file(df) | |
| return excel_file_path, pie_chart_name | |
| def analyze_text(text, docx_file): | |
| if text: | |
| # Segment the text into sentences | |
| segmented_text = segmentation(text) | |
| results = [] | |
| for sentence in segmented_text: | |
| results.append(analyze(sentence)) | |
| df = pd.DataFrame(results, columns=['Negative', 'Neutral', 'Positive']) | |
| df['Text'] = segmented_text | |
| pie_chart_name = generate_pie_chart(df) | |
| excel_file_path = generate_excel_file(df) | |
| return excel_file_path, pie_chart_name | |
| elif docx_file: | |
| return process_file(docx_file.name) | |
| else: | |
| # No input provided | |
| return None | |
| def generate_pie_chart(df): | |
| # Calculate the average scores | |
| neg_avg = df['Negative'].mean() | |
| neu_avg = df['Neutral'].mean() | |
| pos_avg = df['Positive'].mean() | |
| # Create a new DataFrame with the average scores | |
| avg_df = pd.DataFrame({'Sentiment': ['Negative', 'Neutral', 'Positive'], | |
| 'Score': [neg_avg, neu_avg, pos_avg]}) | |
| # Set custom colors for the pie chart | |
| colors = ['#BDBDBD', '#87CEFA', '#9ACD32'] | |
| # Create a pie chart showing the average scores | |
| plt.pie(avg_df['Score'], labels=avg_df['Sentiment'], colors=colors, autopct='%1.1f%%') | |
| plt.title('Average Scores by Sentiment') | |
| # Save the pie chart as an image file | |
| pie_chart_name = 'pie_chart.png' | |
| plt.savefig(pie_chart_name) | |
| plt.close() | |
| return pie_chart_name | |
| def generate_excel_file(df): | |
| # Create a new workbook and worksheet | |
| wb = openpyxl.Workbook() | |
| ws = wb.active | |
| # Add column headers to the worksheet | |
| headers = ['Negative', 'Neutral', 'Positive', 'Text'] | |
| for col_num, header in enumerate(headers, 1): | |
| cell = ws.cell(row=1, column=col_num) | |
| cell.value = header | |
| cell.font = Font(bold=True) | |
| # Set up cell formatting for each sentiment | |
| fill_dict = { | |
| 'Negative': PatternFill(start_color='BDBDBD', end_color='BDBDBD', fill_type='solid'), | |
| 'Neutral': PatternFill(start_color='87CEFA', end_color='87CEFA', fill_type='solid'), | |
| 'Positive': PatternFill(start_color='9ACD32', end_color='9ACD32', fill_type='solid') | |
| } | |
| # Loop through each row of the input DataFrame and write data to the worksheet | |
| for row_num, row_data in df.iterrows(): | |
| # Calculate the highest score and corresponding sentiment for this row | |
| sentiment_cols = ['Negative', 'Neutral', 'Positive'] | |
| scores = [row_data[col] for col in sentiment_cols] | |
| max_score = max(scores) | |
| max_index = scores.index(max_score) | |
| sentiment = sentiment_cols[max_index] | |
| # Write the data to the worksheet | |
| for col_num, col_data in enumerate(row_data, 1): | |
| cell = ws.cell(row=row_num + 2, column=col_num) | |
| cell.value = col_data | |
| if col_num in [1, 2, 3]: | |
| if col_data == max_score: | |
| cell.fill = fill_dict[sentiment] | |
| if col_num == 4: | |
| fill = fill_dict[sentiment] | |
| font_color = WHITE if fill.start_color.rgb == 'BDBDBD' else Color('000000') | |
| cell.fill = fill | |
| cell.font = Font(color=font_color) | |
| if col_data == max_score: | |
| cell.fill = fill_dict[sentiment] | |
| # Save the workbook | |
| excel_file_path = 'result.xlsx' | |
| wb.save(excel_file_path) | |
| return excel_file_path | |
| def analyze_from_text(text): | |
| return analyze_text(text, None) | |
| def analyze_from_file(docx_file): | |
| return analyze_text(None, docx_file) | |
| inputs = [ | |
| gr.Textbox(label="Nhập Văn Bản bằng Tiếng Việt để trải nghiệm ngay"), | |
| gr.File(label="Chọn Tệp File Word(docx) Bạn Muốn Phân Tích") | |
| ] | |
| outputs = [ | |
| gr.File(label="Kết Quả Phân Tích Excel"), | |
| gr.Image(type="filepath", label="Biểu đồ") | |
| ] | |
| interface = gr.Interface( | |
| fn=analyze_text, | |
| inputs=inputs, | |
| outputs=outputs, | |
| title="Phân Tích Cảm xúc thông qua Hội Thoại bằng Tiếng Việt", | |
| allow_flagging="never" # Disable flag button | |
| ) | |
| if __name__ == "__main__": | |
| interface.launch() | |