Spaces:
Sleeping
Sleeping
File size: 6,165 Bytes
30dd9be |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
import streamlit as st
import pandas as pd
import os
import json
import httpx
import time
from typing import List, Dict
class GroqHRGenerator:
def __init__(self, api_key: str):
self.api_key = api_key
self.base_url = "https://api.groq.com/openai/v1/chat/completions"
self.headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
def _call_groq_api(self, prompt: str) -> str:
payload = {
"model": "mixtral-8x7b-32768",
"messages": [
{
"role": "system",
"content": """You are a conversation generator for HR-employee interactions.
Generate realistic conversations with emotional context and natural flow.
Output should be in JSON format with the following structure for each turn:
{"role": "employee/hr", "message": "text", "emotion": "emotion_name"}"""
},
{
"role": "user",
"content": prompt
}
],
"temperature": 0.7,
"max_tokens": 1000
}
try:
response = httpx.post(
self.base_url,
headers=self.headers,
json=payload,
timeout=30.0
)
response.raise_for_status()
return response.json()['choices'][0]['message']['content']
except Exception as e:
st.error(f"Error calling Groq API: {e}")
return None
def generate_conversation(self, scenario: str) -> List[Dict]:
prompt = f"""
Generate a realistic HR-employee conversation about the following scenario:
{scenario}
The conversation should:
1. Include natural emotional responses from the employee
2. Show professional and empathetic responses from HR
3. Have a natural flow and progression
4. Include 12-15 turns between the employee and HR
Return the conversation in JSON format as a list of messages, where each message has:
- role (employee/hr)
- message (the actual text)
- emotion (for employee messages only)
"""
response = self._call_groq_api(prompt)
if response:
try:
response = response.replace("```json", "").replace("```", "").strip()
return json.loads(response)
except json.JSONDecodeError as e:
st.error(f"Error parsing JSON response: {e}")
return None
return None
def generate_dataset(self, scenarios: List[str]) -> pd.DataFrame:
all_turns = []
for scenario_idx, scenario in enumerate(scenarios, 1):
with st.spinner(f'Generating conversation for scenario {scenario_idx}...'):
conversation = self.generate_conversation(scenario)
if conversation:
for turn in conversation:
all_turns.append({
'conversation_id': scenario_idx,
'role': turn['role'],
'message': turn['message'],
'emotion': turn.get('emotion', 'N/A'),
'scenario': scenario
})
time.sleep(1) # Small delay between API calls
if all_turns:
return pd.DataFrame(all_turns)
return None
def main():
st.title("HR Conversation Dataset Generator")
st.write("Generate realistic HR-employee conversations based on different scenarios.")
# API Key input
api_key = st.text_input("Enter your Groq API Key:", type="password")
# Scenario input
st.subheader("Enter Scenarios")
st.write("Add scenarios for generating conversations. Each scenario will generate a unique conversation.")
# Initialize scenarios list in session state if it doesn't exist
if 'scenarios' not in st.session_state:
st.session_state.scenarios = [""]
# Function to add new scenario field
def add_scenario():
st.session_state.scenarios.append("")
# Function to remove scenario field
def remove_scenario(index):
st.session_state.scenarios.pop(index)
# Display scenario input fields
new_scenarios = []
for i, scenario in enumerate(st.session_state.scenarios):
col1, col2 = st.columns([6, 1])
with col1:
new_scenario = st.text_area(f"Scenario {i+1}", scenario, key=f"scenario_{i}")
new_scenarios.append(new_scenario)
with col2:
if i > 0: # Don't allow removing the first scenario
if st.button("Remove", key=f"remove_{i}"):
remove_scenario(i)
st.rerun()
st.session_state.scenarios = new_scenarios
if st.button("Add Another Scenario"):
add_scenario()
st.rerun()
# Generate button
if st.button("Generate Dataset"):
if not api_key:
st.error("Please enter your Groq API key.")
return
# Filter out empty scenarios
scenarios = [s for s in st.session_state.scenarios if s.strip()]
if not scenarios:
st.error("Please enter at least one scenario.")
return
generator = GroqHRGenerator(api_key)
df = generator.generate_dataset(scenarios)
if df is not None:
st.success("Dataset generated successfully!")
# Display the dataset
st.subheader("Generated Dataset")
st.dataframe(df)
# Download button
csv = df.to_csv(index=False)
st.download_button(
label="Download CSV",
data=csv,
file_name="hr_conversations.csv",
mime="text/csv"
)
else:
st.error("Failed to generate dataset. Please try again.")
if __name__ == "__main__":
main() |