Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import os | |
| import json | |
| import httpx | |
| import time | |
| from typing import List, Dict | |
| class GroqHRGenerator: | |
| def __init__(self, api_key: str): | |
| self.api_key = api_key | |
| self.base_url = "https://api.groq.com/openai/v1/chat/completions" | |
| self.headers = { | |
| "Authorization": f"Bearer {api_key}", | |
| "Content-Type": "application/json" | |
| } | |
| def _call_groq_api(self, prompt: str) -> str: | |
| payload = { | |
| "model": "mixtral-8x7b-32768", | |
| "messages": [ | |
| { | |
| "role": "system", | |
| "content": """You are a conversation generator for HR-employee interactions. | |
| Generate realistic conversations with emotional context and natural flow. | |
| Output should be in JSON format with the following structure for each turn: | |
| {"role": "employee/hr", "message": "text", "emotion": "emotion_name"}""" | |
| }, | |
| { | |
| "role": "user", | |
| "content": prompt | |
| } | |
| ], | |
| "temperature": 0.7, | |
| "max_tokens": 1000 | |
| } | |
| try: | |
| response = httpx.post( | |
| self.base_url, | |
| headers=self.headers, | |
| json=payload, | |
| timeout=30.0 | |
| ) | |
| response.raise_for_status() | |
| return response.json()['choices'][0]['message']['content'] | |
| except Exception as e: | |
| st.error(f"Error calling Groq API: {e}") | |
| return None | |
| def generate_conversation(self, scenario: str) -> List[Dict]: | |
| prompt = f""" | |
| Generate a realistic HR-employee conversation about the following scenario: | |
| {scenario} | |
| The conversation should: | |
| 1. Include natural emotional responses from the employee | |
| 2. Show professional and empathetic responses from HR | |
| 3. Have a natural flow and progression | |
| 4. Include 12-15 turns between the employee and HR | |
| Return the conversation in JSON format as a list of messages, where each message has: | |
| - role (employee/hr) | |
| - message (the actual text) | |
| - emotion (for employee messages only) | |
| """ | |
| response = self._call_groq_api(prompt) | |
| if response: | |
| try: | |
| response = response.replace("```json", "").replace("```", "").strip() | |
| return json.loads(response) | |
| except json.JSONDecodeError as e: | |
| st.error(f"Error parsing JSON response: {e}") | |
| return None | |
| return None | |
| def generate_dataset(self, scenarios: List[str]) -> pd.DataFrame: | |
| all_turns = [] | |
| for scenario_idx, scenario in enumerate(scenarios, 1): | |
| with st.spinner(f'Generating conversation for scenario {scenario_idx}...'): | |
| conversation = self.generate_conversation(scenario) | |
| if conversation: | |
| for turn in conversation: | |
| all_turns.append({ | |
| 'conversation_id': scenario_idx, | |
| 'role': turn['role'], | |
| 'message': turn['message'], | |
| 'emotion': turn.get('emotion', 'N/A'), | |
| 'scenario': scenario | |
| }) | |
| time.sleep(1) # Small delay between API calls | |
| if all_turns: | |
| return pd.DataFrame(all_turns) | |
| return None | |
| def main(): | |
| st.title("HR Conversation Dataset Generator") | |
| st.write("Generate realistic HR-employee conversations based on different scenarios.") | |
| # API Key input | |
| api_key = st.text_input("Enter your Groq API Key:", type="password") | |
| # Scenario input | |
| st.subheader("Enter Scenarios") | |
| st.write("Add scenarios for generating conversations. Each scenario will generate a unique conversation.") | |
| # Initialize scenarios list in session state if it doesn't exist | |
| if 'scenarios' not in st.session_state: | |
| st.session_state.scenarios = [""] | |
| # Function to add new scenario field | |
| def add_scenario(): | |
| st.session_state.scenarios.append("") | |
| # Function to remove scenario field | |
| def remove_scenario(index): | |
| st.session_state.scenarios.pop(index) | |
| # Display scenario input fields | |
| new_scenarios = [] | |
| for i, scenario in enumerate(st.session_state.scenarios): | |
| col1, col2 = st.columns([6, 1]) | |
| with col1: | |
| new_scenario = st.text_area(f"Scenario {i+1}", scenario, key=f"scenario_{i}") | |
| new_scenarios.append(new_scenario) | |
| with col2: | |
| if i > 0: # Don't allow removing the first scenario | |
| if st.button("Remove", key=f"remove_{i}"): | |
| remove_scenario(i) | |
| st.rerun() | |
| st.session_state.scenarios = new_scenarios | |
| if st.button("Add Another Scenario"): | |
| add_scenario() | |
| st.rerun() | |
| # Generate button | |
| if st.button("Generate Dataset"): | |
| if not api_key: | |
| st.error("Please enter your Groq API key.") | |
| return | |
| # Filter out empty scenarios | |
| scenarios = [s for s in st.session_state.scenarios if s.strip()] | |
| if not scenarios: | |
| st.error("Please enter at least one scenario.") | |
| return | |
| generator = GroqHRGenerator(api_key) | |
| df = generator.generate_dataset(scenarios) | |
| if df is not None: | |
| st.success("Dataset generated successfully!") | |
| # Display the dataset | |
| st.subheader("Generated Dataset") | |
| st.dataframe(df) | |
| # Download button | |
| csv = df.to_csv(index=False) | |
| st.download_button( | |
| label="Download CSV", | |
| data=csv, | |
| file_name="hr_conversations.csv", | |
| mime="text/csv" | |
| ) | |
| else: | |
| st.error("Failed to generate dataset. Please try again.") | |
| if __name__ == "__main__": | |
| main() |