Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline, AutoTokenizer | |
| from classifier import MistralForSequenceClassification | |
| import torch | |
| import nltk | |
| import json | |
| import pandas as pd | |
| import plotly.graph_objects as go | |
| from wordcloud import WordCloud | |
| import matplotlib.pyplot as plt | |
| import io | |
| import base64 | |
| from PIL import Image | |
| from nltk import bigrams | |
| import malaya | |
| from collections import Counter | |
| import os | |
| from flagging import HuggingFaceDatasetSaver | |
| HF_TOKEN = os.getenv('HUGGINGFACE_HUB_TOKEN') | |
| hf_writer = HuggingFaceDatasetSaver(HF_TOKEN,'HalalFoodNLP/tpb-crowdsourced-dataset') | |
| with open('en.json') as fopen: | |
| en = json.load(fopen) | |
| stopwords = malaya.text.function.get_stopwords() | |
| stopwords = stopwords + en + ['lor', 'quote','Quote','QUOTE','pm', 'long', 'jer', 'time', 'feel', 'liao', 'wow', 'https', 'http', 've', 'ko', 'kena', 'post', 'ni', 'tu', 'don', 'je', 'jeh', 'la', 'tau', 'haha', 'hahaha', 'hahahaha'] | |
| stopwords += ['for me', 'to be', 'in the', 'me to', 'for me to'] | |
| nltk.download('punkt', quiet=True) | |
| nltk.download('punkt_tab', quiet=True) | |
| nltk.download('stopwords', quiet=True) | |
| nltk.download('vader_lexicon', quiet=True) | |
| tokenizer_tpb = AutoTokenizer.from_pretrained('mesolitica/malaysian-mistral-191M-MLM-512') | |
| model_tpb = MistralForSequenceClassification.from_pretrained('HalalFoodNLP/tpb-model-halal', torch_dtype=torch.bfloat16, token = HF_TOKEN) | |
| model_sentiment = MistralForSequenceClassification.from_pretrained('malaysia-ai/sentiment-mistral-191M-MLM', torch_dtype=torch.bfloat16) | |
| pipeline_tpb = pipeline(task="text-classification", model=model_tpb, tokenizer=tokenizer_tpb) | |
| sentiment_pipeline = pipeline("sentiment-analysis", model=model_sentiment, tokenizer=tokenizer_tpb) | |
| data = [] | |
| with open('sentiment-tpb-dataset.jsonl', 'r') as file: | |
| for line in file: | |
| data.append(json.loads(line)) | |
| df = pd.DataFrame(data) | |
| # Update the generate_wordcloud function to return a PIL Image object | |
| def generate_wordcloud(text): | |
| # Generate the word cloud | |
| wordcloud = WordCloud(width=300, height=200, background_color='white').generate(text) | |
| # Create the plot | |
| plt.figure(figsize=(10, 5)) | |
| plt.imshow(wordcloud, interpolation='bilinear') | |
| plt.axis('off') | |
| plt.tight_layout(pad=0) | |
| # Save the plot to a bytes buffer | |
| buf = io.BytesIO() | |
| plt.savefig(buf, format='png') | |
| plt.close() | |
| buf.seek(0) | |
| # Convert bytes buffer to PIL Image | |
| image = Image.open(buf) | |
| return image | |
| # Add a function to generate bigrams | |
| def generate_bigrams(text): | |
| words = nltk.word_tokenize(text.lower()) | |
| words = [word for word in words if word.isalnum() and word not in stopwords] | |
| bi_grams = list(bigrams(words)) | |
| return Counter(bi_grams).most_common(10) | |
| def predict_decision(sentiment_label): | |
| if sentiment_label == 'positive': | |
| return "High likelihood of purchase" | |
| elif sentiment_label == 'neutral': | |
| return "Moderate likelihood of purchase" | |
| else: | |
| return "Low likelihood of purchase" | |
| # Function to generate report based on TPB sentiment | |
| def generate_report(tpb_sentiment_df): | |
| report = "## TPB Factor Analysis and Recommendations Report\n\n" | |
| for _, row in tpb_sentiment_df.iterrows(): | |
| tpb_label = row['tpb_label'] | |
| positive_percentage = row['positive'] | |
| negative_percentage = row['negative'] | |
| if negative_percentage > 70: # Only generate recommendations for positive < 70% | |
| if tpb_label == "attitude": | |
| report += f"### {tpb_label.capitalize()} ({negative_percentage:.1f}% Negative)\n" | |
| report += """ | |
| **Current Issues:** | |
| - High negative perception regarding product quality | |
| - Concerns about halal certification and its authenticity | |
| - Pricing issues in comparison to perceived value | |
| **Recommended Actions:** | |
| 1. **Quality Control Improvements** | |
| - Implement enhanced product quality measures | |
| - Obtain globally recognized halal certifications | |
| - Conduct regular quality audits | |
| 2. **Educational Campaigns** | |
| - Educate customers on halal certification processes | |
| - Raise awareness about the health benefits of halal products | |
| - Highlight ethical and sustainable sourcing | |
| 3. **Pricing Strategy Adjustment** | |
| - Reassess pricing to align with customer expectations | |
| - Introduce discount programs or loyalty initiatives | |
| """ | |
| if tpb_label == "religious knowledge": | |
| report += f"### {tpb_label.capitalize()} ({negative_percentage:.1f}% Negative)\n" | |
| report += """ | |
| **Current Issues:** | |
| - Lack of awareness and understanding about the halal process | |
| - Customers may be unsure of the religious guidelines followed | |
| **Recommended Actions:** | |
| 1. **Religious Knowledge Enhancement** | |
| - Provide clear educational materials on the halal process | |
| - Collaborate with religious scholars to endorse products | |
| - Ensure transparent labeling and certification | |
| 2. **Community Engagement** | |
| - Host webinars or community events about halal | |
| - Partner with local religious organizations for outreach | |
| - Share customer testimonials emphasizing trust in your certification | |
| """ | |
| if tpb_label == "subjective norms": | |
| report += f"### {tpb_label.capitalize()} ({negative_percentage:.1f}% Negative)\n" | |
| report += """ | |
| **Current Issues:** | |
| - Social influence or peer pressure regarding halal compliance is weak | |
| - Lack of community-driven recommendations for the product | |
| **Recommended Actions:** | |
| 1. **Influence Social Circles** | |
| - Engage community leaders or influencers to endorse products | |
| - Create social campaigns around the halal certification to enhance peer recommendations | |
| 2. **Referral Programs** | |
| - Introduce referral programs where existing customers can promote the product | |
| - Offer incentives for customers who share their experiences with others | |
| 3. **Testimonials and Success Stories** | |
| - Use customer testimonials and success stories to strengthen social trust | |
| """ | |
| if tpb_label == "perceived behavioural control": | |
| report += f"### {tpb_label.capitalize()} ({negative_percentage:.1f}% Negative)\n" | |
| report += """ | |
| **Current Issues:** | |
| - Perceived difficulty in understanding or accessing halal-certified products | |
| - Concerns about control over product quality and sourcing transparency | |
| **Recommended Actions:** | |
| 1. **Improve Accessibility** | |
| - Make halal products more accessible through multiple platforms (e-commerce, retail stores) | |
| - Ensure ease of purchase and fast delivery options | |
| 2. **Enhance Transparency** | |
| - Provide detailed information about sourcing and production processes | |
| - Use blockchain or similar technology to enhance transparency in halal certification | |
| 3. **Customer Empowerment** | |
| - Offer customer feedback channels to empower users to voice concerns and suggestions | |
| - Ensure that concerns are addressed promptly to build trust and satisfaction | |
| """ | |
| return report | |
| def search_company(keyword): | |
| if not keyword: | |
| return None, None, None, None, None, None, None, None, None, None | |
| filtered_df = df[df['text'].str.contains(keyword, case=False)] | |
| if filtered_df.empty: | |
| return None, None, None, None, None, None, None, None, None, None | |
| # Calculate sentiment distribution | |
| sentiment_counts = filtered_df['label'].value_counts(normalize=True) * 100 | |
| colors = ['red' if sentiment == 'negative' else 'gray' if sentiment == 'neutral' else 'blue' for sentiment in sentiment_counts.index] | |
| # Create the bar plot | |
| sentiment_fig = go.Figure(data=[go.Bar( | |
| x=sentiment_counts.index, | |
| y=sentiment_counts.values, | |
| text=[f'{val:.1f}%' for val in sentiment_counts.values], | |
| textposition='auto', | |
| marker_color=colors | |
| )]) | |
| sentiment_fig.update_layout( | |
| title='Overall Sentiment Distribution', | |
| xaxis_title='Sentiment', | |
| yaxis_title='Percentage' | |
| ) | |
| tpb_counts = filtered_df['tpb_label'].value_counts(normalize=True) * 100 | |
| tpb_fig = go.Figure(data=[go.Bar( | |
| x=tpb_counts.index, | |
| y=tpb_counts.values, | |
| text=[f'{val:.1f}%' for val in tpb_counts.values], | |
| textposition='auto' | |
| )]) | |
| tpb_fig.update_layout(title='Overall TPB Factor Distribution', xaxis_title='TPB Factor', yaxis_title='Percentage') | |
| # Calculate sentiment distribution within each TPB factor | |
| tpb_sentiment_df = filtered_df.groupby(['tpb_label', 'label']).size().unstack(fill_value=0) | |
| tpb_sentiment_df = tpb_sentiment_df.div(tpb_sentiment_df.sum(axis=1), axis=0) * 100 | |
| color_map = { | |
| 'negative': 'red', | |
| 'neutral': 'gray', | |
| 'positive': 'blue' | |
| } | |
| tpb_sentiment_fig = go.Figure() | |
| for sentiment in tpb_sentiment_df.columns: | |
| tpb_sentiment_fig.add_trace(go.Bar( | |
| name=sentiment, | |
| x=tpb_sentiment_df.index, | |
| y=tpb_sentiment_df[sentiment], | |
| text=[f'{val:.1f}%' for val in tpb_sentiment_df[sentiment]], | |
| textposition='auto', | |
| marker_color=color_map.get(sentiment, 'gray') | |
| )) | |
| tpb_sentiment_fig.update_layout( | |
| barmode='stack', | |
| title='Sentiment Distribution within TPB Factors', | |
| xaxis_title='TPB Factor', | |
| yaxis_title='Percentage' | |
| ) | |
| report = generate_report(tpb_sentiment_df.reset_index()) | |
| wordclouds = {} | |
| bigrams_data = {} | |
| for label in filtered_df['tpb_label'].unique(): | |
| text = ' '.join(filtered_df[filtered_df['tpb_label'] == label]['text']).replace('QUOTE','').replace('quote','').replace('sijil halal','').replace('halal','') | |
| wordclouds[label] = generate_wordcloud(text) | |
| bigrams_data[label] = generate_bigrams(text) | |
| words_only = { | |
| key: [word_pair for word_pair, _ in value] | |
| for key, value in bigrams_data.items() | |
| } | |
| bigram_df = pd.DataFrame({ | |
| label: data for label, data in words_only.items() | |
| }) | |
| bigram_df.index = [f"Top {i+1}" for i in range(len(bigram_df))] | |
| return (sentiment_fig, tpb_fig, tpb_sentiment_fig, filtered_df[filtered_df['text'].str.len() < 300].head(5), | |
| report, wordclouds.get('attitude'), wordclouds.get('religious knowledge'), | |
| wordclouds.get('subjective norms'), wordclouds.get('perceived behavioural control'), bigram_df) | |
| def text_classification_and_sentiment(text, keywords_df): | |
| result_tpb = pipeline_tpb(text) | |
| tpb_label = result_tpb[0]['label'] | |
| tpb_score = result_tpb[0]['score'] | |
| result_sentiment = sentiment_pipeline(text) | |
| sentiment_label = result_sentiment[0]['label'] | |
| sentiment_score = result_sentiment[0]['score'] | |
| keywords_df = pd.read_excel('IMG_8137.xlsx') | |
| # Check for keywords in the first column of the DataFrame | |
| keywords = keywords_df.iloc[:, 0].tolist() | |
| for keyword in keywords: | |
| if keyword.lower() in text.lower(): | |
| sentiment_label = 'negative' | |
| sentiment_score = 1.0 | |
| decision = predict_decision(sentiment_label) | |
| tpb_output = f"TPB Label: {tpb_label}" | |
| sentiment_output = f"Sentiment: {sentiment_label}\nProbability: {sentiment_score * 100:.2f}%" | |
| decision_output = f"Decision: {decision}" | |
| hf_writer.flag([text,tpb_label, sentiment_label]) | |
| return tpb_output, sentiment_output, decision_output | |
| examples = [ | |
| "Alhamdulillah, hari ni dapat makan dekat restoran halal baru. Rasa puas hati dan tenang bila tau makanan yang kita makan dijamin halal.", | |
| "Semua orang cakap kena check logo halal sebelum beli makanan. Dah jadi macam second nature dah sekarang. Korang pun sama kan?" | |
| ] | |
| css = """ | |
| :root { | |
| --bg: #FFFFFF; /* Set the background color to white */ | |
| --col: #191919; /* Define primary text color */ | |
| --bg-dark: #000000; /* Define dark background color if needed */ | |
| --col-dark: #ECF2F7; /* Define dark text color if needed */ | |
| ----body-background-fill: #FFFFFF; | |
| } | |
| html, body { | |
| background-color: var(--bg); /* Set the background color to white for the entire page */ | |
| margin: 0; /* Remove default body margin */ | |
| padding: 0; /* Remove default body padding */ | |
| } | |
| .container { | |
| max-width: 1000px; | |
| margin: auto; | |
| padding: 20px; | |
| } | |
| .title { | |
| text-align: center; | |
| margin-bottom: 20px; | |
| } | |
| .nav-buttons { | |
| display: flex; | |
| justify-content: center; | |
| gap: 10px; | |
| margin-bottom: 20px; | |
| } | |
| #recommendation_report { | |
| background-color: #f9f9f9; /* Keep this background light for the report section */ | |
| padding: 20px; | |
| border: 2px solid #e0e0e0; | |
| border-radius: 10px; | |
| margin-top: 20px; | |
| font-family: Arial, sans-serif; | |
| font-size: 14px; | |
| } | |
| .wrap-text { | |
| white-space: normal !important; | |
| word-wrap: break-word; | |
| } | |
| .footer {visibility: hidden} | |
| """ | |
| with gr.Blocks(css=css + """ | |
| body, .gradio-container, .root, .wrap, #root .background .container { | |
| background-color: white !important; | |
| background-image: none !important; | |
| background-fill: white !important; | |
| } | |
| """, theme='aisyahhrazak/miku-aisyah@=1.2.2') as demo: | |
| with gr.Tabs() as tabs: | |
| with gr.TabItem("User View", id=0): | |
| gr.Markdown("## Text Classification and Sentiment Analysis Based on User Input About Halal Food Acquisition") | |
| gr.Markdown("Enter a text to see TPB classification, sentiment analysis, and purchase prediction results!") | |
| input_text = gr.Textbox(lines=2, label="Input Comment", placeholder="Model can make mistakes, we are striving to improve the model.") | |
| with gr.Row(): | |
| tpb_output = gr.Textbox(lines=3, label="TPB Classification") | |
| sentiment_output = gr.Textbox(lines=3, label="Sentiment Analysis") | |
| decision_output = gr.Textbox(lines=3, label="Purchase Prediction") | |
| # This needs to be called at some point prior to the first call to callback.flag() | |
| hf_writer.setup([input_text,tpb_output, sentiment_output], "flagged_data_points") | |
| classify_button = gr.Button("Analyze") | |
| classify_button.click(fn=text_classification_and_sentiment, inputs=input_text, outputs=[tpb_output, sentiment_output, decision_output]) | |
| gr.Examples(examples=examples, inputs=input_text) | |
| with gr.TabItem("Company View", id=1): | |
| gr.Markdown("# Sentiment Analysis and Purchase Decision Factor for Halal Food Acquisition") | |
| input_text = gr.Textbox(lines=1, label="Search Keyword", placeholder="Enter keyword") | |
| search_button = gr.Button("Search") | |
| with gr.Row(): | |
| sentiment_chart = gr.Plot(label="Sentiment Distribution") | |
| tpb_chart = gr.Plot(label="TPB Factor Distribution") | |
| tpb_sentiment_chart = gr.Plot(label="Sentiment Distribution within TPB Factors") | |
| # Update word cloud outputs to be in a single row | |
| gr.Markdown("### Word Clouds by TPB Label") | |
| with gr.Row(): | |
| attitude_wc = gr.Image(label="Attitude Word Cloud", height=200, width=300) | |
| religious_knowledge_wc = gr.Image(label="Religious Knowledge Word Cloud", height=200, width=300) | |
| subjective_norms_wc = gr.Image(label="Subjective Norms Word Cloud",height=200, width=300) | |
| perceived_behavioural_control_wc = gr.Image(label="Perceived Behavioural Control Word Cloud", height=200, width=300) | |
| with gr.Accordion("See Recommendation Details"): | |
| report_output = gr.Markdown(label="Recommendation Report", elem_id="recommendation_report") | |
| gr.Markdown("### Top Bigrams by TPB Label") | |
| bigram_table = gr.Dataframe(label="Top Bigrams for Each TPB Label") | |
| output_table = gr.Dataframe( | |
| headers=["text", "tpb_label", "sentiment", "score"], | |
| label="Company Analysis Results", | |
| wrap=True | |
| ) | |
| search_button.click( | |
| fn=search_company, | |
| inputs=input_text, | |
| outputs=[ | |
| sentiment_chart, tpb_chart, tpb_sentiment_chart, output_table, report_output, | |
| attitude_wc, religious_knowledge_wc, subjective_norms_wc, perceived_behavioural_control_wc,bigram_table | |
| ] | |
| ) | |
| demo.launch() |