Spaces:

rohangbs
/

Dataset-Creator

Sleeping

App Files Files Community

Dataset-Creator / app.py

rohangbs

Create app.py

30dd9be verified 11 months ago

raw

history blame contribute delete

6.17 kB

	import streamlit as st
	import pandas as pd
	import os
	import json
	import httpx
	import time
	from typing import List, Dict

	class GroqHRGenerator:
	def __init__(self, api_key: str):
	self.api_key = api_key
	self.base_url = "https://api.groq.com/openai/v1/chat/completions"
	self.headers = {
	"Authorization": f"Bearer {api_key}",
	"Content-Type": "application/json"
	}

	def _call_groq_api(self, prompt: str) -> str:
	payload = {
	"model": "mixtral-8x7b-32768",
	"messages": [
	{
	"role": "system",
	"content": """You are a conversation generator for HR-employee interactions.
	Generate realistic conversations with emotional context and natural flow.
	Output should be in JSON format with the following structure for each turn:
	{"role": "employee/hr", "message": "text", "emotion": "emotion_name"}"""
	},
	{
	"role": "user",
	"content": prompt
	}
	],
	"temperature": 0.7,
	"max_tokens": 1000
	}

	try:
	response = httpx.post(
	self.base_url,
	headers=self.headers,
	json=payload,
	timeout=30.0
	)
	response.raise_for_status()
	return response.json()['choices'][0]['message']['content']
	except Exception as e:
	st.error(f"Error calling Groq API: {e}")
	return None

	def generate_conversation(self, scenario: str) -> List[Dict]:
	prompt = f"""
	Generate a realistic HR-employee conversation about the following scenario:
	{scenario}

	The conversation should:
	1. Include natural emotional responses from the employee
	2. Show professional and empathetic responses from HR
	3. Have a natural flow and progression
	4. Include 12-15 turns between the employee and HR

	Return the conversation in JSON format as a list of messages, where each message has:
	- role (employee/hr)
	- message (the actual text)
	- emotion (for employee messages only)
	"""

	response = self._call_groq_api(prompt)
	if response:
	try:
	response = response.replace("```json", "").replace("```", "").strip()
	return json.loads(response)
	except json.JSONDecodeError as e:
	st.error(f"Error parsing JSON response: {e}")
	return None
	return None

	def generate_dataset(self, scenarios: List[str]) -> pd.DataFrame:
	all_turns = []

	for scenario_idx, scenario in enumerate(scenarios, 1):
	with st.spinner(f'Generating conversation for scenario {scenario_idx}...'):
	conversation = self.generate_conversation(scenario)
	if conversation:
	for turn in conversation:
	all_turns.append({
	'conversation_id': scenario_idx,
	'role': turn['role'],
	'message': turn['message'],
	'emotion': turn.get('emotion', 'N/A'),
	'scenario': scenario
	})
	time.sleep(1) # Small delay between API calls

	if all_turns:
	return pd.DataFrame(all_turns)
	return None

	def main():
	st.title("HR Conversation Dataset Generator")
	st.write("Generate realistic HR-employee conversations based on different scenarios.")

	# API Key input
	api_key = st.text_input("Enter your Groq API Key:", type="password")

	# Scenario input
	st.subheader("Enter Scenarios")
	st.write("Add scenarios for generating conversations. Each scenario will generate a unique conversation.")

	# Initialize scenarios list in session state if it doesn't exist
	if 'scenarios' not in st.session_state:
	st.session_state.scenarios = [""]

	# Function to add new scenario field
	def add_scenario():
	st.session_state.scenarios.append("")

	# Function to remove scenario field
	def remove_scenario(index):
	st.session_state.scenarios.pop(index)

	# Display scenario input fields
	new_scenarios = []
	for i, scenario in enumerate(st.session_state.scenarios):
	col1, col2 = st.columns([6, 1])
	with col1:
	new_scenario = st.text_area(f"Scenario {i+1}", scenario, key=f"scenario_{i}")
	new_scenarios.append(new_scenario)
	with col2:
	if i > 0: # Don't allow removing the first scenario
	if st.button("Remove", key=f"remove_{i}"):
	remove_scenario(i)
	st.rerun()

	st.session_state.scenarios = new_scenarios

	if st.button("Add Another Scenario"):
	add_scenario()
	st.rerun()

	# Generate button
	if st.button("Generate Dataset"):
	if not api_key:
	st.error("Please enter your Groq API key.")
	return

	# Filter out empty scenarios
	scenarios = [s for s in st.session_state.scenarios if s.strip()]

	if not scenarios:
	st.error("Please enter at least one scenario.")
	return

	generator = GroqHRGenerator(api_key)
	df = generator.generate_dataset(scenarios)

	if df is not None:
	st.success("Dataset generated successfully!")

	# Display the dataset
	st.subheader("Generated Dataset")
	st.dataframe(df)

	# Download button
	csv = df.to_csv(index=False)
	st.download_button(
	label="Download CSV",
	data=csv,
	file_name="hr_conversations.csv",
	mime="text/csv"
	)
	else:
	st.error("Failed to generate dataset. Please try again.")

	if __name__ == "__main__":
	main()