File size: 6,165 Bytes
30dd9be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import streamlit as st
import pandas as pd
import os
import json
import httpx
import time
from typing import List, Dict

class GroqHRGenerator:
    def __init__(self, api_key: str):
        self.api_key = api_key
        self.base_url = "https://api.groq.com/openai/v1/chat/completions"
        self.headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }

    def _call_groq_api(self, prompt: str) -> str:
        payload = {
            "model": "mixtral-8x7b-32768",
            "messages": [
                {
                    "role": "system",
                    "content": """You are a conversation generator for HR-employee interactions. 
                    Generate realistic conversations with emotional context and natural flow. 
                    Output should be in JSON format with the following structure for each turn:
                    {"role": "employee/hr", "message": "text", "emotion": "emotion_name"}"""
                },
                {
                    "role": "user",
                    "content": prompt
                }
            ],
            "temperature": 0.7,
            "max_tokens": 1000
        }

        try:
            response = httpx.post(
                self.base_url,
                headers=self.headers,
                json=payload,
                timeout=30.0
            )
            response.raise_for_status()
            return response.json()['choices'][0]['message']['content']
        except Exception as e:
            st.error(f"Error calling Groq API: {e}")
            return None

    def generate_conversation(self, scenario: str) -> List[Dict]:
        prompt = f"""
        Generate a realistic HR-employee conversation about the following scenario:
        {scenario}
        
        The conversation should:
        1. Include natural emotional responses from the employee
        2. Show professional and empathetic responses from HR
        3. Have a natural flow and progression
        4. Include 12-15 turns between the employee and HR
        
        Return the conversation in JSON format as a list of messages, where each message has:
        - role (employee/hr)
        - message (the actual text)
        - emotion (for employee messages only)
        """

        response = self._call_groq_api(prompt)
        if response:
            try:
                response = response.replace("```json", "").replace("```", "").strip()
                return json.loads(response)
            except json.JSONDecodeError as e:
                st.error(f"Error parsing JSON response: {e}")
                return None
        return None

    def generate_dataset(self, scenarios: List[str]) -> pd.DataFrame:
        all_turns = []
        
        for scenario_idx, scenario in enumerate(scenarios, 1):
            with st.spinner(f'Generating conversation for scenario {scenario_idx}...'):
                conversation = self.generate_conversation(scenario)
                if conversation:
                    for turn in conversation:
                        all_turns.append({
                            'conversation_id': scenario_idx,
                            'role': turn['role'],
                            'message': turn['message'],
                            'emotion': turn.get('emotion', 'N/A'),
                            'scenario': scenario
                        })
                time.sleep(1)  # Small delay between API calls

        if all_turns:
            return pd.DataFrame(all_turns)
        return None

def main():
    st.title("HR Conversation Dataset Generator")
    st.write("Generate realistic HR-employee conversations based on different scenarios.")

    # API Key input
    api_key = st.text_input("Enter your Groq API Key:", type="password")
    
    # Scenario input
    st.subheader("Enter Scenarios")
    st.write("Add scenarios for generating conversations. Each scenario will generate a unique conversation.")
    
    # Initialize scenarios list in session state if it doesn't exist
    if 'scenarios' not in st.session_state:
        st.session_state.scenarios = [""]

    # Function to add new scenario field
    def add_scenario():
        st.session_state.scenarios.append("")

    # Function to remove scenario field
    def remove_scenario(index):
        st.session_state.scenarios.pop(index)

    # Display scenario input fields
    new_scenarios = []
    for i, scenario in enumerate(st.session_state.scenarios):
        col1, col2 = st.columns([6, 1])
        with col1:
            new_scenario = st.text_area(f"Scenario {i+1}", scenario, key=f"scenario_{i}")
            new_scenarios.append(new_scenario)
        with col2:
            if i > 0:  # Don't allow removing the first scenario
                if st.button("Remove", key=f"remove_{i}"):
                    remove_scenario(i)
                    st.rerun()

    st.session_state.scenarios = new_scenarios

    if st.button("Add Another Scenario"):
        add_scenario()
        st.rerun()

    # Generate button
    if st.button("Generate Dataset"):
        if not api_key:
            st.error("Please enter your Groq API key.")
            return

        # Filter out empty scenarios
        scenarios = [s for s in st.session_state.scenarios if s.strip()]
        
        if not scenarios:
            st.error("Please enter at least one scenario.")
            return

        generator = GroqHRGenerator(api_key)
        df = generator.generate_dataset(scenarios)
        
        if df is not None:
            st.success("Dataset generated successfully!")
            
            # Display the dataset
            st.subheader("Generated Dataset")
            st.dataframe(df)
            
            # Download button
            csv = df.to_csv(index=False)
            st.download_button(
                label="Download CSV",
                data=csv,
                file_name="hr_conversations.csv",
                mime="text/csv"
            )
        else:
            st.error("Failed to generate dataset. Please try again.")

if __name__ == "__main__":
    main()