Spaces:

rohangbs
/

Dataset-Creator

Sleeping

App Files Files Community

rohangbs commited on Feb 17, 2025

Commit

30dd9be

verified ·

1 Parent(s): 9ce7dcf

Create app.py

Browse files

Files changed (1) hide show

app.py +176 -0

app.py ADDED Viewed

	@@ -0,0 +1,176 @@

+import streamlit as st
+import pandas as pd
+import os
+import json
+import httpx
+import time
+from typing import List, Dict
+class GroqHRGenerator:
+    def __init__(self, api_key: str):
+        self.api_key = api_key
+        self.base_url = "https://api.groq.com/openai/v1/chat/completions"
+        self.headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json"
+        }
+    def _call_groq_api(self, prompt: str) -> str:
+        payload = {
+            "model": "mixtral-8x7b-32768",
+            "messages": [
+                {
+                    "role": "system",
+                    "content": """You are a conversation generator for HR-employee interactions.
+                    Generate realistic conversations with emotional context and natural flow.
+                    Output should be in JSON format with the following structure for each turn:
+                    {"role": "employee/hr", "message": "text", "emotion": "emotion_name"}"""
+                },
+                {
+                    "role": "user",
+                    "content": prompt
+                }
+            ],
+            "temperature": 0.7,
+            "max_tokens": 1000
+        }
+        try:
+            response = httpx.post(
+                self.base_url,
+                headers=self.headers,
+                json=payload,
+                timeout=30.0
+            )
+            response.raise_for_status()
+            return response.json()['choices'][0]['message']['content']
+        except Exception as e:
+            st.error(f"Error calling Groq API: {e}")
+            return None
+    def generate_conversation(self, scenario: str) -> List[Dict]:
+        prompt = f"""
+        Generate a realistic HR-employee conversation about the following scenario:
+        {scenario}
+        The conversation should:
+        1. Include natural emotional responses from the employee
+        2. Show professional and empathetic responses from HR
+        3. Have a natural flow and progression
+        4. Include 12-15 turns between the employee and HR
+        Return the conversation in JSON format as a list of messages, where each message has:
+        - role (employee/hr)
+        - message (the actual text)
+        - emotion (for employee messages only)
+        """
+        response = self._call_groq_api(prompt)
+        if response:
+            try:
+                response = response.replace("```json", "").replace("```", "").strip()
+                return json.loads(response)
+            except json.JSONDecodeError as e:
+                st.error(f"Error parsing JSON response: {e}")
+                return None
+        return None
+    def generate_dataset(self, scenarios: List[str]) -> pd.DataFrame:
+        all_turns = []
+        for scenario_idx, scenario in enumerate(scenarios, 1):
+            with st.spinner(f'Generating conversation for scenario {scenario_idx}...'):
+                conversation = self.generate_conversation(scenario)
+                if conversation:
+                    for turn in conversation:
+                        all_turns.append({
+                            'conversation_id': scenario_idx,
+                            'role': turn['role'],
+                            'message': turn['message'],
+                            'emotion': turn.get('emotion', 'N/A'),
+                            'scenario': scenario
+                        })
+                time.sleep(1)  # Small delay between API calls
+        if all_turns:
+            return pd.DataFrame(all_turns)
+        return None
+def main():
+    st.title("HR Conversation Dataset Generator")
+    st.write("Generate realistic HR-employee conversations based on different scenarios.")
+    # API Key input
+    api_key = st.text_input("Enter your Groq API Key:", type="password")
+    # Scenario input
+    st.subheader("Enter Scenarios")
+    st.write("Add scenarios for generating conversations. Each scenario will generate a unique conversation.")
+    # Initialize scenarios list in session state if it doesn't exist
+    if 'scenarios' not in st.session_state:
+        st.session_state.scenarios = [""]
+    # Function to add new scenario field
+    def add_scenario():
+        st.session_state.scenarios.append("")
+    # Function to remove scenario field
+    def remove_scenario(index):
+        st.session_state.scenarios.pop(index)
+    # Display scenario input fields
+    new_scenarios = []
+    for i, scenario in enumerate(st.session_state.scenarios):
+        col1, col2 = st.columns([6, 1])
+        with col1:
+            new_scenario = st.text_area(f"Scenario {i+1}", scenario, key=f"scenario_{i}")
+            new_scenarios.append(new_scenario)
+        with col2:
+            if i > 0:  # Don't allow removing the first scenario
+                if st.button("Remove", key=f"remove_{i}"):
+                    remove_scenario(i)
+                    st.rerun()
+    st.session_state.scenarios = new_scenarios
+    if st.button("Add Another Scenario"):
+        add_scenario()
+        st.rerun()
+    # Generate button
+    if st.button("Generate Dataset"):
+        if not api_key:
+            st.error("Please enter your Groq API key.")
+            return
+        # Filter out empty scenarios
+        scenarios = [s for s in st.session_state.scenarios if s.strip()]
+        if not scenarios:
+            st.error("Please enter at least one scenario.")
+            return
+        generator = GroqHRGenerator(api_key)
+        df = generator.generate_dataset(scenarios)
+        if df is not None:
+            st.success("Dataset generated successfully!")
+            # Display the dataset
+            st.subheader("Generated Dataset")
+            st.dataframe(df)
+            # Download button
+            csv = df.to_csv(index=False)
+            st.download_button(
+                label="Download CSV",
+                data=csv,
+                file_name="hr_conversations.csv",
+                mime="text/csv"
+            )
+        else:
+            st.error("Failed to generate dataset. Please try again.")
+if __name__ == "__main__":
+    main()