Spaces:

rohangbs
/

Classifier

Sleeping

App Files Files Community

rohangbs commited on Feb 19, 2025

Commit

cc8c18c

verified ·

1 Parent(s): f9f07b6

Create app.py

Browse files

Files changed (1) hide show

app.py +541 -0

app.py ADDED Viewed

	@@ -0,0 +1,541 @@

+import streamlit as st
+import pandas as pd
+import plotly.express as px
+from groq import Groq
+import json
+import time
+import re
+from concurrent.futures import ThreadPoolExecutor
+from io import StringIO
+class CustomConversationIntentClassifier:
+    def __init__(self):
+        # Define hierarchical intent categories and their patterns
+        if 'custom_intents' not in st.session_state:
+            self.intent_hierarchy = {
+                "A. Communication & Response Intent": {
+                    "Information-Seeking": [
+                        r"what", r"how", r"why", r"when", r"where", r"who",
+                        r"want to know", r"tell me about", r"can you explain"
+                    ],
+                    "Clarification": [
+                        r"explain", r"clarify", r"what do you mean", r"repeat",
+                        r"didn't understand", r"could you elaborate"
+                    ],
+                    "Agreement": [
+                        r"yes", r"agree", r"makes sense", r"exactly",
+                        r"that's right", r"correct"
+                    ],
+                    "Disagreement": [
+                        r"no", r"don't agree", r"incorrect", r"that's wrong",
+                        r"i disagree", r"not correct"
+                    ],
+                    "Acknowledgment": [
+                        r"got it", r"i see", r"understood", r"noted",
+                        r"alright", r"okay"
+                    ],
+                    "Apology": [
+                        r"sorry", r"apologize", r"my mistake", r"my fault",
+                        r"i apologize", r"regret"
+                    ],
+                    "Appreciation": [
+                        r"thank you", r"thanks", r"appreciate", r"grateful",
+                        r"thank you for your help"
+                    ],
+                    "Urgency": [
+                        r"asap", r"urgent", r"immediately", r"right away",
+                        r"emergency", r"as soon as possible"
+                    ]
+                },
+                "B. Decision-Making Intent": {
+                    "Exploration": [
+                        r"consider", r"explore", r"what if", r"options",
+                        r"alternatives", r"possibilities"
+                    ],
+                    "Commitment": [
+                        r"decided", r"will do", r"i've made my decision",
+                        r"going to", r"i will", r"definitely"
+                    ],
+                    "Indecision": [
+                        r"not sure", r"unsure", r"undecided", r"can't decide",
+                        r"torn between", r"haven't decided"
+                    ],
+                    "Delegation": [
+                        r"can you handle", r"take care of", r"assign",
+                        r"please handle", r"can you manage"
+                    ],
+                    "Evaluation": [
+                        r"compare", r"evaluate", r"assess", r"weigh",
+                        r"pros and cons", r"better option"
+                    ]
+                },
+                "C. Emotional & Psychological Intent": {
+                    "Seeking Validation": [
+                        r"am i right", r"is this correct", r"does this make sense",
+                        r"what do you think", r"how did i do"
+                    ],
+                    "Seeking Support": [
+                        r"need help", r"support", r"assist", r"guide",
+                        r"can you help", r"struggling with"
+                    ],
+                    "Expressing Frustration": [
+                        r"annoying", r"frustrated", r"irritating", r"fed up",
+                        r"this is difficult", r"getting nowhere"
+                    ],
+                    "Venting": [
+                        r"just need to", r"off my chest", r"let me tell you",
+                        r"you won't believe", r"so tired of"
+                    ],
+                    "Seeking Comfort": [
+                        r"feeling down", r"upset", r"worried", r"anxious",
+                        r"stressed", r"not feeling great"
+                    ]
+                },
+                "D. Social & Relationship Intent": {
+                    "Social Bonding": [
+                        r"coffee", r"lunch", r"catch up", r"get together",
+                        r"hang out", r"meet up"
+                    ],
+                    "Networking": [
+                        r"connect", r"network", r"introduction", r"link up",
+                        r"get in touch", r"reach out"
+                    ],
+                    "Collaboration": [
+                        r"work together", r"collaborate", r"team up",
+                        r"join forces", r"partner"
+                    ],
+                    "Teaching": [
+                        r"let me show", r"teach", r"explain how",
+                        r"guide you through", r"help you understand"
+                    ],
+                    "Testing Boundaries": [
+                        r"be honest", r"frank", r"between us",
+                        r"confidential", r"keep this private"
+                    ]
+                },
+                "E. Action-Oriented Intent": {
+                    "Requesting Action": [
+                        r"can you", r"please", r"would you", r"need you to",
+                        r"send", r"do this"
+                    ],
+                    "Offering Help": [
+                        r"can i help", r"let me help", r"assistance",
+                        r"i can do", r"happy to help"
+                    ],
+                    "Providing Feedback": [
+                        r"feedback", r"suggestion", r"think about",
+                        r"my opinion", r"recommend"
+                    ],
+                    "Expressing Intent to Quit": [
+                        r"quit", r"give up", r"stop", r"abandon",
+                        r"no longer want", r"discontinue"
+                    ],
+                    "Confirming Action": [
+                        r"is this done", r"completed", r"finished",
+                        r"status", r"update"
+                    ]
+                }
+            }
+            st.session_state['custom_intents'] = self.intent_hierarchy
+        else:
+            self.intent_hierarchy = st.session_state['custom_intents']
+    def add_intent_category(self, main_category, subcategory, patterns):
+        if main_category not in self.intent_hierarchy:
+            self.intent_hierarchy[main_category] = {}
+        self.intent_hierarchy[main_category][subcategory] = patterns
+        st.session_state['custom_intents'] = self.intent_hierarchy
+    def preprocess_text(self, text):
+        if pd.isna(text):
+            return ""
+        text = str(text).lower()
+        text = re.sub(r'[^\w\s]', ' ', text)
+        return text
+    def classify_intent(self, text):
+        text = self.preprocess_text(text)
+        results = []
+        for main_category, subcategories in self.intent_hierarchy.items():
+            for subcategory, patterns in subcategories.items():
+                for pattern in patterns:
+                    if re.search(r'\b' + pattern + r'\b', text):
+                        results.append({
+                            'main_category': main_category,
+                            'subcategory': subcategory
+                        })
+                        break
+                if results and results[-1]['subcategory'] == subcategory:
+                    break
+        if not results:
+            return [{'main_category': 'Unclassified', 'subcategory': 'Other'}]
+        return results
+    def process_conversation(self, df):
+        hr_intents = [self.classify_intent(msg) for msg in df['HR']]
+        employee_intents = [self.classify_intent(msg) for msg in df['Employee']]
+        results_df = pd.DataFrame({
+            'HR_Message': df['HR'],
+            'HR_Main_Category': [intent[0]['main_category'] for intent in hr_intents],
+            'HR_Subcategory': [intent[0]['subcategory'] for intent in hr_intents],
+            'Employee_Message': df['Employee'],
+            'Employee_Main_Category': [intent[0]['main_category'] for intent in employee_intents],
+            'Employee_Subcategory': [intent[0]['subcategory'] for intent in employee_intents]
+        })
+        return results_df
+class EnhancedConversationAnalyzer:
+    def __init__(self, groq_api_key):
+        self.client = Groq(api_key=groq_api_key)
+        # System prompt for consistent analysis
+        self.system_prompt = """You are an expert conversation analyzer focusing on workplace communications.
+        Analyze conversations for sentiment, psychological aspects, and satisfaction levels.
+        Always respond with valid JSON containing numerical scores and brief explanations."""
+    def clean_json_response(self, response_text):
+        """Clean and validate JSON response"""
+        try:
+            # Try to find JSON content between curly braces
+            start = response_text.find('{')
+            end = response_text.rfind('}') + 1
+            if start != -1 and end != 0:
+                json_str = response_text[start:end]
+                return json.loads(json_str)
+        except:
+            pass
+        return self.get_empty_analysis()
+    def analyze_message(self, message, role):
+        """Analyze a single message using Groq LLM"""
+        if pd.isna(message):
+            return self.get_empty_analysis()
+        prompt = f"""Analyze this {role} message and respond ONLY with a JSON object:
+Message: "{message}"
+Required JSON format:
+{{
+    "sentiment": {{
+        "compound": <float between -1 and 1>,
+        "positive": <float between 0 and 1>,
+        "negative": <float between 0 and 1>
+    }},
+    "psychological": {{
+        "stress": <integer between 0 and 10>,
+        "confidence": <integer between 0 and 10>,
+        "frustration": <integer between 0 and 10>
+    }},
+    "satisfaction": <integer between 0 and 100>,
+    "explanation": "<brief analysis, max 50 words>"
+}}
+Ensure the response is ONLY the JSON object with no additional text."""
+        try:
+            completion = self.client.chat.completions.create(
+                messages=[
+                    {"role": "system", "content": self.system_prompt},
+                    {"role": "user", "content": prompt}
+                ],
+                model="llama-3.3-70b-versatile",
+                temperature=0.1,
+            )
+            # Get and clean the response
+            response_text = completion.choices[0].message.content
+            analysis = self.clean_json_response(response_text)
+            # Validate and sanitize the values
+            analysis = self.validate_analysis(analysis)
+            return analysis
+        except Exception as e:
+            st.error(f"Error analyzing message: {str(e)}")
+            return self.get_empty_analysis()
+    def validate_analysis(self, analysis):
+        """Validate and sanitize analysis values"""
+        template = self.get_empty_analysis()
+        try:
+            # Ensure all required fields exist and have valid values
+            sentiment = analysis.get('sentiment', {})
+            template['sentiment']['compound'] = max(-1, min(1, float(sentiment.get('compound', 0))))
+            template['sentiment']['positive'] = max(0, min(1, float(sentiment.get('positive', 0))))
+            template['sentiment']['negative'] = max(0, min(1, float(sentiment.get('negative', 0))))
+            psychological = analysis.get('psychological', {})
+            template['psychological']['stress'] = max(0, min(10, int(psychological.get('stress', 0))))
+            template['psychological']['confidence'] = max(0, min(10, int(psychological.get('confidence', 0))))
+            template['psychological']['frustration'] = max(0, min(10, int(psychological.get('frustration', 0))))
+            template['satisfaction'] = max(0, min(100, int(analysis.get('satisfaction', 0))))
+            template['explanation'] = str(analysis.get('explanation', ''))[:50]
+            return template
+        except:
+            return template
+    def get_empty_analysis(self):
+        """Return empty analysis structure"""
+        return {
+            "sentiment": {"compound": 0.0, "positive": 0.0, "negative": 0.0},
+            "psychological": {"stress": 0, "confidence": 0, "frustration": 0},
+            "satisfaction": 0,
+            "explanation": "No message to analyze"
+        }
+    def process_conversation(self, df):
+        """Process conversation with LLM analysis"""
+        results = []
+        total_rows = len(df)
+        progress_bar = st.progress(0)
+        with ThreadPoolExecutor(max_workers=4) as executor:
+            for index, row in df.iterrows():
+                # Update progress
+                progress = (index + 1) / total_rows
+                progress_bar.progress(progress)
+                # Process messages
+                hr_future = executor.submit(self.analyze_message, row['HR'], 'HR')
+                emp_future = executor.submit(self.analyze_message, row['Employee'], 'Employee')
+                hr_analysis = hr_future.result()
+                emp_analysis = emp_future.result()
+                results.append({
+                    'HR_Message': row['HR'],
+                    'HR_Sentiment_Compound': hr_analysis['sentiment']['compound'],
+                    'HR_Sentiment_Positive': hr_analysis['sentiment']['positive'],
+                    'HR_Sentiment_Negative': hr_analysis['sentiment']['negative'],
+                    'HR_Satisfaction_Score': hr_analysis['satisfaction'],
+                    'HR_Stress_Level': hr_analysis['psychological']['stress'],
+                    'HR_Confidence_Level': hr_analysis['psychological']['confidence'],
+                    'HR_Frustration_Level': hr_analysis['psychological']['frustration'],
+                    'HR_Analysis': hr_analysis['explanation'],
+                    'Employee_Message': row['Employee'],
+                    'Employee_Sentiment_Compound': emp_analysis['sentiment']['compound'],
+                    'Employee_Sentiment_Positive': emp_analysis['sentiment']['positive'],
+                    'Employee_Sentiment_Negative': emp_analysis['sentiment']['negative'],
+                    'Employee_Satisfaction_Score': emp_analysis['satisfaction'],
+                    'Employee_Stress_Level': emp_analysis['psychological']['stress'],
+                    'Employee_Confidence_Level': emp_analysis['psychological']['confidence'],
+                    'Employee_Frustration_Level': emp_analysis['psychological']['frustration'],
+                    'Employee_Analysis': emp_analysis['explanation']
+                })
+                # Add a small delay to avoid rate limits
+                time.sleep(0.1)
+        progress_bar.empty()
+        return pd.DataFrame(results)
+def create_intent_distribution_plot(df, role):
+    main_category_counts = df[f'{role}_Main_Category'].value_counts()
+    fig = px.bar(
+        x=main_category_counts.index,
+        y=main_category_counts.values,
+        title=f'Intent Distribution for {role}',
+        labels={'x': 'Intent Category', 'y': 'Count'}
+    )
+    return fig
+def intent_management_ui():
+    st.sidebar.header("Custom Intent Management")
+    # Add new intent category
+    with st.sidebar.expander("Add New Intent Category"):
+        main_category = st.text_input("Main Category (e.g., F. Custom Intent)")
+        subcategory = st.text_input("Subcategory (e.g., Custom Type)")
+        patterns = st.text_area("Patterns (one per line)")
+        if st.button("Add Intent"):
+            if main_category and subcategory and patterns:
+                pattern_list = [p.strip() for p in patterns.split('\n') if p.strip()]
+                st.session_state.classifier.add_intent_category(
+                    main_category, subcategory, pattern_list
+                )
+                st.success(f"Added new intent: {main_category} - {subcategory}")
+    # View current intents
+    with st.sidebar.expander("View Current Intents"):
+        st.json(st.session_state.classifier.intent_hierarchy)
+    # Export/Import intents
+    with st.sidebar.expander("Export/Import Intents"):
+        if st.button("Export Intents"):
+            json_str = json.dumps(st.session_state.classifier.intent_hierarchy, indent=2)
+            st.download_button(
+                label="Download Intents JSON",
+                data=json_str,
+                file_name="custom_intents.json",
+                mime="application/json"
+            )
+        uploaded_json = st.file_uploader("Import Intents JSON", type="json")
+        if uploaded_json is not None:
+            try:
+                new_intents = json.load(uploaded_json)
+                st.session_state.classifier.intent_hierarchy = new_intents
+                st.session_state['custom_intents'] = new_intents
+                st.success("Successfully imported intents")
+            except Exception as e:
+                st.error(f"Error importing intents: {str(e)}")
+def main():
+    st.title("Comprehensive Conversation Analyzer")
+    st.write("Upload a CSV file to analyze conversations using intent classification and sentiment analysis.")
+    # Initialize intent classifier
+    if 'classifier' not in st.session_state:
+        st.session_state.classifier = CustomConversationIntentClassifier()
+    # Show intent management UI in sidebar
+    intent_management_ui()
+    # Groq API key input for sentiment analysis
+    groq_api_key = st.text_input("Enter your Groq API key for sentiment analysis", type="password")
+    # File upload
+    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
+    if uploaded_file is not None:
+        try:
+            df = pd.read_csv(uploaded_file)
+            if 'HR' not in df.columns or 'Employee' not in df.columns:
+                st.error("CSV file must contain 'HR' and 'Employee' columns!")
+                return
+            st.subheader("Sample of Original Data")
+            st.dataframe(df.head())
+            # Store results for later combination
+            intent_results = None
+            sentiment_results = None
+            # Intent Classification
+            with st.expander("Intent Classification Results"):
+                with st.spinner("Classifying intents..."):
+                    intent_results = st.session_state.classifier.process_conversation(df)
+                st.dataframe(intent_results)
+                st.subheader("Intent Distribution")
+                hr_plot = create_intent_distribution_plot(intent_results, 'HR')
+                st.plotly_chart(hr_plot)
+                emp_plot = create_intent_distribution_plot(intent_results, 'Employee')
+                st.plotly_chart(emp_plot)
+                # Download intent results
+                intent_csv = intent_results.to_csv(index=False)
+                st.download_button(
+                    label="Download intent classification results as CSV",
+                    data=intent_csv,
+                    file_name="classified_conversations.csv",
+                    mime="text/csv"
+                )
+            # Sentiment Analysis
+            if groq_api_key:
+                with st.expander("Sentiment Analysis Results"):
+                    analyzer = EnhancedConversationAnalyzer(groq_api_key)
+                    with st.spinner("Analyzing sentiments using AI... This may take a few minutes."):
+                        sentiment_results = analyzer.process_conversation(df)
+                    # Display sentiment summary metrics
+                    col1, col2, col3 = st.columns(3)
+                    with col1:
+                        st.metric(
+                            "Average HR Satisfaction",
+                            f"{sentiment_results['HR_Satisfaction_Score'].mean():.1f}%"
+                        )
+                    with col2:
+                        st.metric(
+                            "Average Employee Satisfaction",
+                            f"{sentiment_results['Employee_Satisfaction_Score'].mean():.1f}%"
+                        )
+                    with col3:
+                        st.metric(
+                            "Overall Sentiment",
+                            f"{sentiment_results['Employee_Sentiment_Compound'].mean():.2f}"
+                        )
+                    # Display sentiment visualizations
+                    sentiment_fig = px.line(
+                        sentiment_results,
+                        y=['HR_Sentiment_Compound', 'Employee_Sentiment_Compound'],
+                        title='Sentiment Trends',
+                        labels={'value': 'Sentiment Score', 'index': 'Message Number'}
+                    )
+                    st.plotly_chart(sentiment_fig)
+                    satisfaction_fig = px.line(
+                        sentiment_results,
+                        y=['HR_Satisfaction_Score', 'Employee_Satisfaction_Score'],
+                        title='Satisfaction Score Trends',
+                        labels={'value': 'Satisfaction Score', 'index': 'Message Number'}
+                    )
+                    st.plotly_chart(satisfaction_fig)
+                    # Display detailed sentiment results
+                    st.subheader("Detailed Sentiment Analysis")
+                    st.dataframe(sentiment_results)
+                    # Download sentiment results
+                    sentiment_csv = sentiment_results.to_csv(index=False)
+                    st.download_button(
+                        label="Download sentiment analysis results as CSV",
+                        data=sentiment_csv,
+                        file_name="sentiment_analysis.csv",
+                        mime="text/csv"
+                    )
+            else:
+                st.warning("Please enter your Groq API key to perform sentiment analysis.")
+            # Combined Results Section
+            if intent_results is not None:
+                st.subheader("Combined Analysis Results")
+                if sentiment_results is not None:
+                    # Combine the results
+                    # Keep only one copy of the messages
+                    combined_results = intent_results.copy()
+                    # Add sentiment columns
+                    sentiment_columns = [col for col in sentiment_results.columns
+                                      if col not in ['HR_Message', 'Employee_Message']]
+                    for col in sentiment_columns:
+                        combined_results[col] = sentiment_results[col]
+                    st.write("Preview of combined results:")
+                    st.dataframe(combined_results.head())
+                    # Download combined results
+                    combined_csv = combined_results.to_csv(index=False)
+                    st.download_button(
+                        label="Download combined analysis results as CSV",
+                        data=combined_csv,
+                        file_name="combined_analysis.csv",
+                        mime="text/csv",
+                        key="combined_download"
+                    )
+                else:
+                    st.info("Add your Groq API key and run sentiment analysis to get combined results.")
+        except Exception as e:
+            st.error(f"An error occurred: {str(e)}")
+if __name__ == "__main__":
+    main()