Spaces:
Build error
Build error
Upload 15 files
Browse files- common/.DS_Store +0 -0
- common/CombineReportsMain.ipynb +1 -0
- common/Config.py +183 -0
- common/DataDictionary.py +134 -0
- common/FastFacts.py +81 -0
- common/Interview.py +75 -0
- common/InterviewUtilities.py +144 -0
- common/LLMConfig.py +155 -0
- common/PersonalityValues.py +154 -0
- common/RespondentAgent.py +153 -0
- common/UserProfile.py +359 -0
- common/Utilities.py +140 -0
- researchsimulation/InteractiveInterviewChatbot.py +124 -0
- researchsimulation/InteractiveInterviewSimulation.py +133 -0
- researchsimulation/InterviewSimulation.py +196 -0
common/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
common/CombineReportsMain.ipynb
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyPaVbmI1rVozLNCS5uIGcEq"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","source":["!pip install dotenv\n","!pip install pydantic\n","!pip install XlsxWriter\n","!pip install openpyxl\n","!pip install pandas\n","!pip install boto3"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"_EsZoeD7g2Ap","executionInfo":{"status":"ok","timestamp":1744207805584,"user_tz":-480,"elapsed":37869,"user":{"displayName":"Elaine Ng","userId":"17781798492345444321"}},"outputId":"1cde6ec1-a6c8-4429-ce02-5f32279f7c58","collapsed":true},"execution_count":1,"outputs":[{"output_type":"stream","name":"stdout","text":["Collecting dotenv\n"," Downloading dotenv-0.9.9-py2.py3-none-any.whl.metadata (279 bytes)\n","Collecting python-dotenv (from dotenv)\n"," Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)\n","Downloading dotenv-0.9.9-py2.py3-none-any.whl (1.9 kB)\n","Downloading python_dotenv-1.1.0-py3-none-any.whl (20 kB)\n","Installing collected packages: python-dotenv, dotenv\n","Successfully installed dotenv-0.9.9 python-dotenv-1.1.0\n","Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (2.11.2)\n","Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic) (0.7.0)\n","Requirement already satisfied: pydantic-core==2.33.1 in /usr/local/lib/python3.11/dist-packages (from pydantic) (2.33.1)\n","Requirement already satisfied: typing-extensions>=4.12.2 in /usr/local/lib/python3.11/dist-packages (from pydantic) (4.13.1)\n","Requirement already satisfied: typing-inspection>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from pydantic) (0.4.0)\n","Collecting XlsxWriter\n"," Downloading XlsxWriter-3.2.2-py3-none-any.whl.metadata (2.8 kB)\n","Downloading XlsxWriter-3.2.2-py3-none-any.whl (165 kB)\n","\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m165.1/165.1 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hInstalling collected packages: XlsxWriter\n","Successfully installed XlsxWriter-3.2.2\n","Requirement already satisfied: openpyxl in /usr/local/lib/python3.11/dist-packages (3.1.5)\n","Requirement already satisfied: et-xmlfile in /usr/local/lib/python3.11/dist-packages (from openpyxl) (2.0.0)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (2.2.2)\n","Requirement already satisfied: numpy>=1.23.2 in /usr/local/lib/python3.11/dist-packages (from pandas) (2.0.2)\n","Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas) (2.8.2)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas) (2025.2)\n","Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas) (2025.2)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n","Collecting boto3\n"," Downloading boto3-1.37.30-py3-none-any.whl.metadata (6.7 kB)\n","Collecting botocore<1.38.0,>=1.37.30 (from boto3)\n"," Downloading botocore-1.37.30-py3-none-any.whl.metadata (5.7 kB)\n","Collecting jmespath<2.0.0,>=0.7.1 (from boto3)\n"," Downloading jmespath-1.0.1-py3-none-any.whl.metadata (7.6 kB)\n","Collecting s3transfer<0.12.0,>=0.11.0 (from boto3)\n"," Downloading s3transfer-0.11.4-py3-none-any.whl.metadata (1.7 kB)\n","Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /usr/local/lib/python3.11/dist-packages (from botocore<1.38.0,>=1.37.30->boto3) (2.8.2)\n","Requirement already satisfied: urllib3!=2.2.0,<3,>=1.25.4 in /usr/local/lib/python3.11/dist-packages (from botocore<1.38.0,>=1.37.30->boto3) (2.3.0)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.38.0,>=1.37.30->boto3) (1.17.0)\n","Downloading boto3-1.37.30-py3-none-any.whl (139 kB)\n","\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m139.6/139.6 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading botocore-1.37.30-py3-none-any.whl (13.5 MB)\n","\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m96.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading jmespath-1.0.1-py3-none-any.whl (20 kB)\n","Downloading s3transfer-0.11.4-py3-none-any.whl (84 kB)\n","\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m84.4/84.4 kB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hInstalling collected packages: jmespath, botocore, s3transfer, boto3\n","Successfully installed boto3-1.37.30 botocore-1.37.30 jmespath-1.0.1 s3transfer-0.11.4\n"]}]},{"cell_type":"code","execution_count":2,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"v7z1LWRyz-bh","outputId":"f235fa7a-6b34-4e2f-b4ea-2f676b96027a","executionInfo":{"status":"ok","timestamp":1744207959003,"user_tz":-480,"elapsed":153406,"user":{"displayName":"Elaine Ng","userId":"17781798492345444321"}}},"outputs":[{"output_type":"stream","name":"stdout","text":["Mounted at /content/drive\n","/content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/common\n","Environment Name: itc_frozenfood.dev1\n","Number of Respondents: 200\n","Number of Focus Groups: 200\n","Base Directory: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/.\n","Config Directory: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood\n","Test Result Directory: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/tests\n","Input Directory: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/input\n","Output Directory: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output\n","Respondent Summary File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/RawTranscriptList.xlsx\n","Focus Group Summary File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/FocusGroupProfiles.xlsx\n","Personality Question File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/Personality_Assessment.xlsx\n","Respondent Details File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/UserProfiles.xlsx\n","Data Dictionary File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/DataDictionary.xlsx\n","Personality Scoring File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/schwartz_values_scoring.txt\n","Style Tone Question File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/None\n","Interview Question File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/interview_questions_full.xlsx\n","Survey Question File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/survey_questions.xlsx\n","Interview Validation Files: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/None\n","Agent Model: groq/deepseek-r1-distill-llama-70b\n","GROQ API Key: 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\n","Model: gpt-4o\n","Open API Key: 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\n","Open Router API Key: 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\n","Processing Model Type: None\n","Processing Model Name: None\n","Processing Model API Key: None\n","Directory exists /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19\n","Processing agent P1_B2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B2_interview_results.xlsx\n","Processing agent P1_B3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B3_interview_results.xlsx\n","Processing agent P1_B4_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B4_1_interview_results.xlsx\n","Processing agent P1_B4_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B4_2_interview_results.xlsx\n","Processing agent P1_B4_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B4_3_interview_results.xlsx\n","Processing agent P1_B4_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B4_4_interview_results.xlsx\n","Processing agent P1_B4_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B4_5_interview_results.xlsx\n","Processing agent P1_B5_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B5_1_interview_results.xlsx\n","Processing agent P1_B5_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B5_2_interview_results.xlsx\n","Processing agent P1_B5_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B5_3_interview_results.xlsx\n","Processing agent P1_B5_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B5_4_interview_results.xlsx\n","Processing agent P1_B5_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B5_5_interview_results.xlsx\n","Processing agent P1_B6_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B6_1_interview_results.xlsx\n","Processing agent P1_B6_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B6_2_interview_results.xlsx\n","Processing agent P1_B6_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B6_3_interview_results.xlsx\n","Processing agent P1_B6_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B6_4_interview_results.xlsx\n","Processing agent P1_B6_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B6_5_interview_results.xlsx\n","Processing agent P1_D1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D1_interview_results.xlsx\n","Processing agent P1_D2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D2_interview_results.xlsx\n","Processing agent P1_D3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D3_interview_results.xlsx\n","Processing agent P1_D4_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D4_1_interview_results.xlsx\n","Processing agent P1_D4_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D4_2_interview_results.xlsx\n","Processing agent P1_D4_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D4_3_interview_results.xlsx\n","Processing agent P1_D4_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D4_4_interview_results.xlsx\n","Processing agent P1_D4_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D4_5_interview_results.xlsx\n","Processing agent P1_D5_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D5_1_interview_results.xlsx\n","Processing agent P1_D5_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D5_2_interview_results.xlsx\n","Processing agent P1_D5_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D5_3_interview_results.xlsx\n","Processing agent P1_D5_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D5_4_interview_results.xlsx\n","Processing agent P1_D5_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D5_5_interview_results.xlsx\n","Processing agent P1_D6_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D6_1_interview_results.xlsx\n","Processing agent P1_D6_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D6_2_interview_results.xlsx\n","Processing agent P1_D6_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D6_3_interview_results.xlsx\n","Processing agent P1_D6_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D6_4_interview_results.xlsx\n","Processing agent P1_D6_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D6_5_interview_results.xlsx\n","Processing agent P2_B1_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B1_1_interview_results.xlsx\n","Processing agent P2_B1_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B1_2_interview_results.xlsx\n","Processing agent P2_B1_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B1_3_interview_results.xlsx\n","Processing agent P2_B1_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B1_4_interview_results.xlsx\n","Processing agent P2_B1_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B1_5_interview_results.xlsx\n","Processing agent P2_B2_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B2_1_interview_results.xlsx\n","Processing agent P2_B2_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B2_2_interview_results.xlsx\n","Processing agent P2_B2_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B2_3_interview_results.xlsx\n","Processing agent P2_B2_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B2_4_interview_results.xlsx\n","Processing agent P2_B4_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B4_1_interview_results.xlsx\n","Processing agent P2_B4_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B4_2_interview_results.xlsx\n","Processing agent P2_B4_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B4_3_interview_results.xlsx\n","Processing agent P2_B4_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B4_4_interview_results.xlsx\n","Processing agent P2_B4_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B4_5_interview_results.xlsx\n","Processing agent P2_B5_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B5_1_interview_results.xlsx\n","Processing agent P2_B5_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B5_2_interview_results.xlsx\n","Processing agent P2_B5_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B5_3_interview_results.xlsx\n","Processing agent P2_B5_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B5_4_interview_results.xlsx\n","Processing agent P2_B5_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B5_5_interview_results.xlsx\n","Processing agent P2_B6_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B6_1_interview_results.xlsx\n","Processing agent P2_B6_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B6_2_interview_results.xlsx\n","Processing agent P2_B6_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B6_3_interview_results.xlsx\n","Processing agent P2_B6_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B6_4_interview_results.xlsx\n","Processing agent P2_B6_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B6_5_interview_results.xlsx\n","Processing agent P2_D1_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D1_1_interview_results.xlsx\n","Processing agent P2_D1_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D1_2_interview_results.xlsx\n","Processing agent P2_D1_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D1_3_interview_results.xlsx\n","Processing agent P2_D1_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D1_4_interview_results.xlsx\n","Processing agent P2_D1_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D1_5_interview_results.xlsx\n","Processing agent P1_B1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B1_interview_results.xlsx\n","Processing agent P2_D2_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D2_1_interview_results.xlsx\n","Processing agent P2_D2_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D2_2_interview_results.xlsx\n","Processing agent P2_D2_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D2_3_interview_results.xlsx\n","Processing agent P2_D2_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D2_4_interview_results.xlsx\n","Processing agent P2_D2_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D2_5_interview_results.xlsx\n","Processing agent P2_D3_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D3_1_interview_results.xlsx\n","Processing agent P2_D3_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D3_2_interview_results.xlsx\n","Processing agent P2_D3_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D3_3_interview_results.xlsx\n","Processing agent P2_D3_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D3_4_interview_results.xlsx\n","Processing agent P2_D3_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D3_5_interview_results.xlsx\n","Processing agent P2_D4_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D4_1_interview_results.xlsx\n","Processing agent P2_D4_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D4_2_interview_results.xlsx\n","Processing agent P2_D4_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D4_3_interview_results.xlsx\n","Processing agent P2_D4_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D4_4_interview_results.xlsx\n","Processing agent P2_D4_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D4_5_interview_results.xlsx\n","Processing agent P2_D5_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D5_1_interview_results.xlsx\n","Processing agent P2_D5_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D5_2_interview_results.xlsx\n","Processing agent P2_D5_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D5_3_interview_results.xlsx\n","Processing agent P2_D5_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D5_4_interview_results.xlsx\n","Processing agent P2_D5_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D5_5_interview_results.xlsx\n","Processing agent P2_D6_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D6_2_interview_results.xlsx\n","Processing agent P2_D6_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D6_1_interview_results.xlsx\n","Processing agent P2_D6_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D6_3_interview_results.xlsx\n","Processing agent P2_D6_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D6_4_interview_results.xlsx\n","Processing agent P2_D6_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D6_5_interview_results.xlsx\n","Processing agent P2_D7_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D7_1_interview_results.xlsx\n","Processing agent P2_D7_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D7_2_interview_results.xlsx\n","Processing agent P2_D7_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D7_3_interview_results.xlsx\n","Processing agent P2_D7_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D7_4_interview_results.xlsx\n","Processing agent P2_D7_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D7_5_interview_results.xlsx\n","β
All reports written to /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/interview_results_FULL_REPORT.xlsx\n"]}],"source":["from google.colab import drive\n","import sys\n","import os\n","import time\n","\n","drive.mount('/content/drive')\n","\n","base_dir = '/content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/.'\n","common_dir = f'{base_dir}/common'\n","run_dir = f'{base_dir}/common'\n","\n","sys.path.append(common_dir)\n","%cd {run_dir}\n","\n","from Config import Config\n","from Utilities import *\n","from UserProfile import *\n","from Interview import *\n","from PersonalityValues import *\n","from itertools import islice\n","\n","now = datetime.datetime.now()\n","timestamp = now.strftime(\"%m-%d_%H-%M\")\n","\n","# MAIN\n","Config.load_environment(base_dir, \"itc_frozenfood.dev1\")\n","Config.print_environment()\n","\n","# Specify report directory\n","report_type = \"interview_results\"\n","report_dir = f\"{Config.output_dir}/interviewresponses_04-08_10-19\"\n","full_report_file = f\"{report_dir}/{report_type}_FULL_REPORT.xlsx\"\n","\n","# Initialize an empty DataFrame to store the combined interview responses\n","full_report_df = None\n","\n","if os.path.exists(report_dir):\n"," print(f\"Directory exists {report_dir}\")\n","else:\n"," print(f\"Directory does not exist {report_dir}\")\n"," sys.exit()\n","\n","report_files = [\n"," filename for filename in os.listdir(report_dir)\n"," if f\"{report_type}.xlsx\" in filename\n"," ]\n","\n","if not report_files:\n"," print(f\"No report files named *{report_type}* were found in {report_dir}\")\n"," sys.exit()\n","\n","for report_file in report_files:\n"," respondent_agent_id = report_file.split(f\"_{report_type}\")[0]\n"," print(f\"Processing agent {respondent_agent_id}\")\n"," interview_output_file = f'{report_dir}/{respondent_agent_id}_interview_results.xlsx'\n","\n"," if os.path.exists(interview_output_file):\n"," print(f\"Processing file: {interview_output_file}\")\n"," interview_response_df = pd.read_excel(interview_output_file)\n"," # Add \"Report Name\" column **before appending** (Pandas automatically places it first)\n"," interview_response_df.insert(0, \"Respondent Agent\", respondent_agent_id)\n","\n"," # Append to the full DataFrame\n"," full_report_df = (\n"," interview_response_df if full_report_df is None\n"," else pd.concat([full_report_df, interview_response_df], ignore_index=True)\n"," )\n"," else:\n"," print(f\"File '{interview_output_file}' does not exist.\")\n","\n","\n","if full_report_df is not None and not full_report_df.empty:\n"," full_report_df.to_excel(full_report_file, index=False)\n"," print(f\"β
All reports written to {full_report_file}\")\n","else:\n"," print(f\"β οΈ No reports were processed. {full_report_file} was not created.\")"]},{"cell_type":"markdown","source":[],"metadata":{"id":"8eaYo-b24Wvg"}}]}
|
common/Config.py
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dotenv import load_dotenv
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
class Config:
|
| 5 |
+
# === General Environment Info ===
|
| 6 |
+
env_name = None
|
| 7 |
+
num_respondents = None
|
| 8 |
+
num_focus_groups = None
|
| 9 |
+
|
| 10 |
+
# === Directories and Files ===
|
| 11 |
+
base_dir = None
|
| 12 |
+
config_dir = None
|
| 13 |
+
test_result_dir = None
|
| 14 |
+
input_dir = None
|
| 15 |
+
output_dir = None
|
| 16 |
+
respondent_summary_file = None
|
| 17 |
+
focus_group_summary_file = None
|
| 18 |
+
respondent_details_file = None
|
| 19 |
+
data_dictionary_file = None
|
| 20 |
+
personality_question_file = None
|
| 21 |
+
personality_scoring_file = None
|
| 22 |
+
style_tone_question_file = None
|
| 23 |
+
interview_question_file = None
|
| 24 |
+
survey_question_file = None
|
| 25 |
+
interview_validation_files = None
|
| 26 |
+
|
| 27 |
+
# === Respondent Agent Configs ===
|
| 28 |
+
respondent_agent_host = None
|
| 29 |
+
respondent_agent_model = None
|
| 30 |
+
respondent_agent_api_key = None
|
| 31 |
+
respondent_agent_url = None
|
| 32 |
+
respondent_agent_temperature = None
|
| 33 |
+
respondent_agent_top_p = None
|
| 34 |
+
respondent_agent_frequency_penalty = None
|
| 35 |
+
respondent_agent_presence_penalty = None
|
| 36 |
+
|
| 37 |
+
# === Processing Agent Configs ===
|
| 38 |
+
processing_agent_host = None
|
| 39 |
+
processing_agent_model = None
|
| 40 |
+
processing_agent_api_key = None
|
| 41 |
+
processing_agent_url = None
|
| 42 |
+
processing_agent_temperature = None
|
| 43 |
+
processing_agent_top_p = None
|
| 44 |
+
processing_agent_frequency_penalty = None
|
| 45 |
+
processing_agent_presence_penalty = None
|
| 46 |
+
|
| 47 |
+
# === Processor Configs ===
|
| 48 |
+
processor_host = None
|
| 49 |
+
processor_model = None
|
| 50 |
+
processor_api_key = None
|
| 51 |
+
processor_url = None
|
| 52 |
+
processor_temperature = None
|
| 53 |
+
processor_top_p = None
|
| 54 |
+
processor_frequency_penalty = None
|
| 55 |
+
processor_presence_penalty = None
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# Function to load the environment variables based on the given environment name
|
| 59 |
+
@classmethod
|
| 60 |
+
def load_environment(cls, base_dir, my_env_name):
|
| 61 |
+
# Determine the path to the .env file based on the environment name
|
| 62 |
+
env_file = f'{base_dir}/config/{my_env_name}.env' # Update the base path as needed
|
| 63 |
+
|
| 64 |
+
# Load the environment variables from the specified .env file
|
| 65 |
+
load_dotenv(dotenv_path=env_file)
|
| 66 |
+
|
| 67 |
+
cls.base_dir = base_dir
|
| 68 |
+
cls.env_name = my_env_name
|
| 69 |
+
cls.num_respondents = int(os.getenv('NUM_RESPONDENTS', 0))
|
| 70 |
+
cls.num_focus_groups = int(os.getenv('NUM_FOCUS_GROUPS', 0))
|
| 71 |
+
|
| 72 |
+
# Construct paths based on BASE_DIR and subdirectories/filenames
|
| 73 |
+
cls.config_dir = f"{base_dir}/{os.getenv('CONFIG_SUBDIR')}"
|
| 74 |
+
cls.test_result_dir = f"{base_dir}/{os.getenv('TEST_SUBDIR')}"
|
| 75 |
+
cls.input_dir = f"{base_dir}/{os.getenv('INPUT_SUBDIR')}"
|
| 76 |
+
cls.output_dir = f"{base_dir}/{os.getenv('OUTPUT_SUBDIR')}"
|
| 77 |
+
cls.respondent_summary_file = f"{cls.config_dir}/{os.getenv('RESPONDENT_SUMMARY_FILE')}"
|
| 78 |
+
cls.focus_group_summary_file = f"{cls.config_dir}/{os.getenv('FOCUS_GROUP_SUMMARY_FILE')}"
|
| 79 |
+
cls.respondent_details_file = f"{cls.config_dir}/{os.getenv('RESPONDENT_DETAILS_FILE')}"
|
| 80 |
+
cls.data_dictionary_file = f"{cls.config_dir}/{os.getenv('DATA_DICTIONARY_FILE')}"
|
| 81 |
+
cls.personality_question_file = f"{cls.config_dir}/{os.getenv('PERSONALITY_QUESTION_FILE')}"
|
| 82 |
+
cls.personality_scoring_file = f"{cls.config_dir}/{os.getenv('PERSONALITY_SCORING_FILE')}"
|
| 83 |
+
cls.style_tone_question_file = f"{cls.config_dir}/{os.getenv('STYLE_TONE_QUESTION_FILE')}"
|
| 84 |
+
cls.interview_question_file = f"{cls.config_dir}/{os.getenv('INTERVIEW_QUESTION_FILE')}"
|
| 85 |
+
cls.survey_question_file = f"{cls.config_dir}/{os.getenv('SURVEY_QUESTION_FILE')}"
|
| 86 |
+
cls.interview_validation_files = f"{cls.config_dir}/{os.getenv('INTERVIEW_VALIDATION_FILES')}"
|
| 87 |
+
|
| 88 |
+
# Respondent Agent Model: Load the environment variables, API keys, and parameters
|
| 89 |
+
cls.respondent_agent_host = os.getenv(os.getenv("RESPONDENT_AGENT_HOST"))
|
| 90 |
+
cls.respondent_agent_model = os.getenv(os.getenv("RESPONDENT_AGENT_MODEL"))
|
| 91 |
+
|
| 92 |
+
respondent_agent_prefix = (lambda: os.getenv('RESPONDENT_AGENT_HOST').replace('_AGENT_HOST', ''))()
|
| 93 |
+
cls.respondent_agent_api_key = os.getenv(f"{respondent_agent_prefix}_API_KEY")
|
| 94 |
+
cls.respondent_agent_url = os.getenv(f"{respondent_agent_prefix}_URL")
|
| 95 |
+
|
| 96 |
+
cls.respondent_agent_temperature = float(os.getenv(f"{respondent_agent_prefix}_TEMPERATURE", 0.0))
|
| 97 |
+
cls.respondent_agent_top_p = float(os.getenv(f"{respondent_agent_prefix}_TOP_P", 0.0))
|
| 98 |
+
cls.respondent_agent_frequency_penalty = float(os.getenv(f"{respondent_agent_prefix}_FREQUENCY_PENALTY", 0.0))
|
| 99 |
+
cls.respondent_agent_presence_penalty = float(os.getenv(f"{respondent_agent_prefix}_PRESENCE_PENALTY", 0.0))
|
| 100 |
+
|
| 101 |
+
# Processing Agent Model: Load the environment variables, API keys, and parameters
|
| 102 |
+
cls.processing_agent_host = os.getenv(os.getenv("PROCESSING_AGENT_HOST"))
|
| 103 |
+
cls.processing_agent_model = os.getenv(os.getenv("PROCESSING_AGENT_MODEL"))
|
| 104 |
+
|
| 105 |
+
processing_agent_prefix = (lambda: os.getenv('PROCESSING_AGENT_HOST').replace('_AGENT_HOST', ''))()
|
| 106 |
+
cls.processing_agent_api_key = os.getenv(f"{processing_agent_prefix}_API_KEY")
|
| 107 |
+
cls.processing_agent_url = os.getenv(f"{processing_agent_prefix}_URL")
|
| 108 |
+
|
| 109 |
+
cls.processing_agent_temperature = float(os.getenv(f"{processing_agent_prefix}_TEMPERATURE", 0.0))
|
| 110 |
+
cls.processing_agent_top_p = float(os.getenv(f"{processing_agent_prefix}_TOP_P", 0.0))
|
| 111 |
+
cls.processing_agent_frequency_penalty = float(os.getenv(f"{processing_agent_prefix}_FREQUENCY_PENALTY", 0.0))
|
| 112 |
+
cls.processing_agent_presence_penalty = float(os.getenv(f"{processing_agent_prefix}_PRESENCE_PENALTY", 0.0))
|
| 113 |
+
|
| 114 |
+
# Processor Model: Load the environment variables, API keys, and parameters
|
| 115 |
+
cls.processor_host = os.getenv(os.getenv("PROCESSOR_HOST"))
|
| 116 |
+
cls.processor_model = os.getenv(os.getenv("PROCESSOR_MODEL"))
|
| 117 |
+
|
| 118 |
+
processor_prefix = (lambda: os.getenv('PROCESSOR_HOST').replace('_AGENT_HOST', ''))()
|
| 119 |
+
cls.processor_api_key = os.getenv(f"{processor_prefix}_API_KEY")
|
| 120 |
+
cls.processor_url = os.getenv(f"{processor_prefix}_URL")
|
| 121 |
+
|
| 122 |
+
cls.processor_temperature = float(os.getenv(f"{processor_prefix}_TEMPERATURE", 0.0))
|
| 123 |
+
cls.processor_top_p = float(os.getenv(f"{processor_prefix}_TOP_P", 0.0))
|
| 124 |
+
cls.processor_frequency_penalty = float(os.getenv(f"{processor_prefix}_FREQUENCY_PENALTY", 0.0))
|
| 125 |
+
cls.processor_presence_penalty = float(os.getenv(f"{processor_prefix}_PRESENCE_PENALTY", 0.0))
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
@classmethod
|
| 129 |
+
def print_environment(cls):
|
| 130 |
+
|
| 131 |
+
print("ENVIRONMENT CONFIGURATION")
|
| 132 |
+
print(f"Environment Name: {cls.env_name}")
|
| 133 |
+
print(f"Number of Respondents: {cls.num_respondents}")
|
| 134 |
+
print(f"Number of Focus Groups: {cls.num_focus_groups}")
|
| 135 |
+
|
| 136 |
+
print("\nDIRECTORIES:")
|
| 137 |
+
print(f"Base Directory: {cls.base_dir}")
|
| 138 |
+
print(f"Config Directory: {cls.config_dir}")
|
| 139 |
+
print(f"Test Result Directory: {cls.test_result_dir}")
|
| 140 |
+
print(f"Input Directory: {cls.input_dir}")
|
| 141 |
+
print(f"Output Directory: {cls.output_dir}")
|
| 142 |
+
|
| 143 |
+
print("\nFILES:")
|
| 144 |
+
print(f"Respondent Summary File: {cls.respondent_summary_file}")
|
| 145 |
+
print(f"Focus Group Summary File: {cls.focus_group_summary_file}")
|
| 146 |
+
print(f"Personality Question File: {cls.personality_question_file}")
|
| 147 |
+
print(f"Respondent Details File: {cls.respondent_details_file}")
|
| 148 |
+
print(f"Data Dictionary File: {cls.data_dictionary_file}")
|
| 149 |
+
print(f"Personality Scoring File: {cls.personality_scoring_file}")
|
| 150 |
+
print(f"Style Tone Question File: {cls.style_tone_question_file}")
|
| 151 |
+
print(f"Interview Question File: {cls.interview_question_file}")
|
| 152 |
+
print(f"Survey Question File: {cls.survey_question_file}")
|
| 153 |
+
print(f"Interview Validation Files: {cls.interview_validation_files}")
|
| 154 |
+
|
| 155 |
+
print("\nRESPONDENT AGENT CONFIGS")
|
| 156 |
+
print(f"Respondent Agent Host: {cls.respondent_agent_host}")
|
| 157 |
+
print(f"Respondent Agent Model: {cls.respondent_agent_model}")
|
| 158 |
+
print(f"Respondent Agent API Key: {cls.respondent_agent_api_key}")
|
| 159 |
+
print(f"Respondent Agent URL: {cls.respondent_agent_url}")
|
| 160 |
+
print(f"Respondent Agent Temperature: {cls.respondent_agent_temperature}")
|
| 161 |
+
print(f"Respondent Agent Top P: {cls.respondent_agent_top_p}")
|
| 162 |
+
print(f"Respondent Agent Frequency Penalty: {cls.respondent_agent_frequency_penalty}")
|
| 163 |
+
print(f"Respondent Agent Presence Penalty: {cls.respondent_agent_presence_penalty}")
|
| 164 |
+
|
| 165 |
+
print("\nPROCESSING AGENT CONFIGS")
|
| 166 |
+
print(f"Processing Agent Host: {cls.processing_agent_host}")
|
| 167 |
+
print(f"Processing Agent Name: {cls.processing_agent_model}")
|
| 168 |
+
print(f"Processing Agent API Key: {cls.processing_agent_api_key}")
|
| 169 |
+
print(f"Processing Agent URL: {cls.processing_agent_url}")
|
| 170 |
+
print(f"Processing Agent Temperature: {cls.processing_agent_temperature}")
|
| 171 |
+
print(f"Processing Agent Top P: {cls.processing_agent_top_p}")
|
| 172 |
+
print(f"Processing Agent Frequency Penalty: {cls.processing_agent_frequency_penalty}")
|
| 173 |
+
print(f"Processing Agent Presence Penalty: {cls.processing_agent_presence_penalty}")
|
| 174 |
+
|
| 175 |
+
print("\nPROCESSOR CONFIGS")
|
| 176 |
+
print(f"Processor Host: {cls.processor_host}")
|
| 177 |
+
print(f"Processor Name: {cls.processor_model}")
|
| 178 |
+
print(f"Processor API Key: {cls.processor_api_key}")
|
| 179 |
+
print(f"Processor URL: {cls.processor_url}")
|
| 180 |
+
print(f"Processor Temperature: {cls.processor_temperature}")
|
| 181 |
+
print(f"Processor Top P: {cls.processor_top_p}")
|
| 182 |
+
print(f"Processor Frequency Penalty: {cls.processor_frequency_penalty}")
|
| 183 |
+
print(f"Processor Presence Penalty: {cls.processor_presence_penalty}")
|
common/DataDictionary.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import datetime
|
| 3 |
+
import textwrap
|
| 4 |
+
|
| 5 |
+
from Config import Config
|
| 6 |
+
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import numpy as np
|
| 9 |
+
|
| 10 |
+
class DataDictionary:
|
| 11 |
+
def __init__(self):
|
| 12 |
+
"""
|
| 13 |
+
Initialize the DataDictionary instance with an empty list of entries.
|
| 14 |
+
"""
|
| 15 |
+
self.entries = []
|
| 16 |
+
|
| 17 |
+
def add_entry(self, entry):
|
| 18 |
+
"""
|
| 19 |
+
Add an entry to the data dictionary. Entry should be a dict with expected keys.
|
| 20 |
+
Filters out None or empty-string values, and ensures required keys are present.
|
| 21 |
+
"""
|
| 22 |
+
required_keys = {"Type", "Parameter", "Description"}
|
| 23 |
+
missing = required_keys - entry.keys()
|
| 24 |
+
if missing:
|
| 25 |
+
raise ValueError(f"Missing required fields in entry: {missing}")
|
| 26 |
+
|
| 27 |
+
# Optionally filter or transform the entry
|
| 28 |
+
clean_entry = {k: v for k, v in entry.items() if v is not None and v != ""}
|
| 29 |
+
|
| 30 |
+
self.entries.append(clean_entry)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def get_types(self):
|
| 34 |
+
"""
|
| 35 |
+
Extract all types defined for the data dictionary, preserving insertion order.
|
| 36 |
+
|
| 37 |
+
Returns:
|
| 38 |
+
list: A list of all unique types in the dictionary, preserving order.
|
| 39 |
+
"""
|
| 40 |
+
seen = set()
|
| 41 |
+
ordered_types = []
|
| 42 |
+
for entry in self.entries:
|
| 43 |
+
Type = entry.get("Type")
|
| 44 |
+
if Type not in seen and Type is not None:
|
| 45 |
+
seen.add(Type)
|
| 46 |
+
ordered_types.append(Type)
|
| 47 |
+
return ordered_types
|
| 48 |
+
|
| 49 |
+
def get_parameters(self, type="All"):
|
| 50 |
+
"""
|
| 51 |
+
Extract parameters of a particular type from the data dictionary, preserving insertion order.
|
| 52 |
+
|
| 53 |
+
Args:
|
| 54 |
+
type (str): Type of entries to return (defaults to "All").
|
| 55 |
+
|
| 56 |
+
Returns:
|
| 57 |
+
list: A list of all unique parameters matching the specified type, preserving order.
|
| 58 |
+
"""
|
| 59 |
+
seen = set()
|
| 60 |
+
ordered_parameters = []
|
| 61 |
+
|
| 62 |
+
for entry in self.entries:
|
| 63 |
+
if type == "All" or entry["Type"] == type:
|
| 64 |
+
parameter = entry["Parameter"]
|
| 65 |
+
if parameter not in seen:
|
| 66 |
+
seen.add(parameter)
|
| 67 |
+
ordered_parameters.append(parameter)
|
| 68 |
+
|
| 69 |
+
return ordered_parameters
|
| 70 |
+
|
| 71 |
+
def get_columns(self):
|
| 72 |
+
"""
|
| 73 |
+
Generate a list of column names in the format type_parameter.
|
| 74 |
+
|
| 75 |
+
Returns:
|
| 76 |
+
list: A list of column names preserving order.
|
| 77 |
+
"""
|
| 78 |
+
columns = []
|
| 79 |
+
for entry in self.entries:
|
| 80 |
+
Type = entry["Type"]
|
| 81 |
+
Parameter = entry["Parameter"]
|
| 82 |
+
if Type and Parameter: # Ensure both Type and Parameter exist
|
| 83 |
+
columns.append(f"{Type}_{Parameter}")
|
| 84 |
+
return columns
|
| 85 |
+
|
| 86 |
+
def filter_entries(self, Source=None, Type=None, Parameter=None):
|
| 87 |
+
"""
|
| 88 |
+
Filter entries based on Source, Type, or Parameter.
|
| 89 |
+
|
| 90 |
+
Args:
|
| 91 |
+
Source (str, optional): The source to filter by.
|
| 92 |
+
Type (str, optional): The type to filter by.
|
| 93 |
+
Parameter (str, optional): The parameter to filter by.
|
| 94 |
+
|
| 95 |
+
Returns:
|
| 96 |
+
list: A list of entries matching the filter criteria.
|
| 97 |
+
"""
|
| 98 |
+
return [
|
| 99 |
+
entry for entry in self.entries
|
| 100 |
+
if (Source is None or entry["Source"] == Source) and
|
| 101 |
+
(Type is None or entry["Type"] == Type) and
|
| 102 |
+
(Parameter is None or entry["Parameter"] == Parameter)
|
| 103 |
+
]
|
| 104 |
+
|
| 105 |
+
@staticmethod
|
| 106 |
+
def generate_dictionary(data_dictionary_file):
|
| 107 |
+
"""
|
| 108 |
+
Static method to generate a DataDictionary instance from an Excel (.xlsx) file.
|
| 109 |
+
|
| 110 |
+
Args:
|
| 111 |
+
data_dictionary_file (str): The path to the Excel file containing data dictionary entries.
|
| 112 |
+
|
| 113 |
+
Returns:
|
| 114 |
+
DataDictionary: A populated DataDictionary instance.
|
| 115 |
+
"""
|
| 116 |
+
import pandas as pd # Ensure pandas is imported
|
| 117 |
+
df = pd.read_excel(data_dictionary_file)
|
| 118 |
+
|
| 119 |
+
data_dictionary = DataDictionary()
|
| 120 |
+
|
| 121 |
+
for _, row in df.iterrows():
|
| 122 |
+
data_dictionary.add_entry({
|
| 123 |
+
"Type": row["Type"],
|
| 124 |
+
"Parameter": row["Parameter"],
|
| 125 |
+
"Description": row["Description"],
|
| 126 |
+
"Source": row.get("Source"),
|
| 127 |
+
"ValidValues": row.get("Scoring_Method"),
|
| 128 |
+
"InferredLogic": row.get("Inferred_Logic"),
|
| 129 |
+
})
|
| 130 |
+
|
| 131 |
+
return data_dictionary
|
| 132 |
+
|
| 133 |
+
def __repr__(self):
|
| 134 |
+
return f"DataDictionary({len(self.entries)} entries)"
|
common/FastFacts.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import datetime
|
| 3 |
+
import textwrap
|
| 4 |
+
|
| 5 |
+
from Config import Config
|
| 6 |
+
from DataDictionary import *
|
| 7 |
+
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import numpy as np
|
| 10 |
+
|
| 11 |
+
class FastFacts:
|
| 12 |
+
def __init__(self):
|
| 13 |
+
self.facts = None # Lazily initialised
|
| 14 |
+
|
| 15 |
+
def add_fact(self, fact):
|
| 16 |
+
"""
|
| 17 |
+
Add a single fact to the list, ensuring lazy initialisation.
|
| 18 |
+
"""
|
| 19 |
+
if not isinstance(fact, str):
|
| 20 |
+
print("Only strings are allowed as facts.")
|
| 21 |
+
return
|
| 22 |
+
|
| 23 |
+
# Initialise the list if it doesn't exist
|
| 24 |
+
if self.facts is None:
|
| 25 |
+
self.facts = []
|
| 26 |
+
|
| 27 |
+
self.facts.append(fact)
|
| 28 |
+
|
| 29 |
+
def add_facts(self, facts):
|
| 30 |
+
"""
|
| 31 |
+
Add multiple facts to the list, ensuring lazy initialisation.
|
| 32 |
+
"""
|
| 33 |
+
if not isinstance(facts, (set, list)):
|
| 34 |
+
print("Facts must be provided as a set or list.")
|
| 35 |
+
return
|
| 36 |
+
|
| 37 |
+
# Initialise the list if it doesn't exist
|
| 38 |
+
if self.facts is None:
|
| 39 |
+
self.facts = []
|
| 40 |
+
|
| 41 |
+
for fact in facts:
|
| 42 |
+
if isinstance(fact, str):
|
| 43 |
+
self.facts.append(fact)
|
| 44 |
+
else:
|
| 45 |
+
print(f"Skipping non-string fact: {fact}")
|
| 46 |
+
|
| 47 |
+
def __repr__(self):
|
| 48 |
+
if not self.facts:
|
| 49 |
+
return f"{self.__class__.__name__}: No facts available"
|
| 50 |
+
formatted_facts = ", ".join(f"<{fact}>" for fact in self.facts)
|
| 51 |
+
return f"{self.__class__.__name__}: {formatted_facts}"
|
| 52 |
+
|
| 53 |
+
def to_dict(self):
|
| 54 |
+
"""
|
| 55 |
+
Convert the FastFacts to a dictionary. Return an empty list if no facts are available.
|
| 56 |
+
"""
|
| 57 |
+
return {"facts": self.facts or []}
|
| 58 |
+
|
| 59 |
+
@staticmethod
|
| 60 |
+
def read_from_excel(fact_file):
|
| 61 |
+
"""
|
| 62 |
+
Read facts from an Excel file and populate a FastFacts object.
|
| 63 |
+
|
| 64 |
+
Args:
|
| 65 |
+
fact_file (str): Path to the Excel file.
|
| 66 |
+
|
| 67 |
+
Returns:
|
| 68 |
+
FastFacts: A populated FastFacts object.
|
| 69 |
+
"""
|
| 70 |
+
try:
|
| 71 |
+
df = pd.read_excel(fact_file)
|
| 72 |
+
facts_list = df["FastFacts"].dropna().tolist() # Assuming the facts are in a column named 'FastFacts'
|
| 73 |
+
|
| 74 |
+
# Create a FastFacts object and populate it with facts
|
| 75 |
+
fast_facts_obj = FastFacts()
|
| 76 |
+
fast_facts_obj.add_facts(facts_list)
|
| 77 |
+
|
| 78 |
+
return fast_facts_obj
|
| 79 |
+
except Exception as e:
|
| 80 |
+
print(f"An error occurred while reading from the Excel file: {e}")
|
| 81 |
+
return None
|
common/Interview.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List, Optional
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from itertools import groupby
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class QAEntry(BaseModel):
|
| 8 |
+
Num: int
|
| 9 |
+
Section: str
|
| 10 |
+
Question: str
|
| 11 |
+
Expected_Output: Optional[str]
|
| 12 |
+
Respondent: Optional[str]
|
| 13 |
+
Answer: Optional[str]
|
| 14 |
+
|
| 15 |
+
class InterviewReport(BaseModel):
|
| 16 |
+
Entries: List[QAEntry]
|
| 17 |
+
|
| 18 |
+
def __repr__(self):
|
| 19 |
+
output = ""
|
| 20 |
+
for section, entries in groupby(self.Entries, key=lambda entry: entry.Section):
|
| 21 |
+
output += f"{section}:\n"
|
| 22 |
+
for entry in entries:
|
| 23 |
+
output += f"Q {entry.Num}: {entry.Question}\n"
|
| 24 |
+
output += f"Expected Output: {entry.Expected_Output if entry.Expected_Output else 'No Expected Output'}\n"
|
| 25 |
+
output += f"Respondent: {entry.Respondent if entry.Respondent else 'No Respondent'}\n"
|
| 26 |
+
output += f"A: {entry.Answer if entry.Answer else 'No Answer'}\n"
|
| 27 |
+
return output
|
| 28 |
+
|
| 29 |
+
def get_respondent_responses(self,respondent):
|
| 30 |
+
respondent_entries = [
|
| 31 |
+
entry for entry in self.Entries
|
| 32 |
+
if entry.Respondent and entry.Respondent.lower() == respondent.lower()
|
| 33 |
+
]
|
| 34 |
+
|
| 35 |
+
return respondent_entries
|
| 36 |
+
|
| 37 |
+
@staticmethod
|
| 38 |
+
def generate_interview_script(interview_file):
|
| 39 |
+
df = pd.read_excel(interview_file)
|
| 40 |
+
|
| 41 |
+
qa_entries = []
|
| 42 |
+
for idx, row in enumerate(df.to_dict('records')):
|
| 43 |
+
print(f"Processing row {idx}: {row}") # Debug: show the full row being processed
|
| 44 |
+
|
| 45 |
+
entry = QAEntry(
|
| 46 |
+
Num = row['Num'],
|
| 47 |
+
Section = row['Section'],
|
| 48 |
+
Question = row['Question'],
|
| 49 |
+
Expected_Output = row.get('Expected_Output') if pd.notna(row.get('Expected_Output')) else None,
|
| 50 |
+
Respondent = None,
|
| 51 |
+
Answer = None
|
| 52 |
+
)
|
| 53 |
+
qa_entries.append(entry)
|
| 54 |
+
|
| 55 |
+
return InterviewReport(Entries = qa_entries)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
@staticmethod
|
| 59 |
+
def generate_interview_report(interview_file):
|
| 60 |
+
df = pd.read_excel(interview_file)
|
| 61 |
+
|
| 62 |
+
qa_entries = [
|
| 63 |
+
QAEntry(
|
| 64 |
+
Num = row['Num'],
|
| 65 |
+
Section = row['Section'],
|
| 66 |
+
Question = row['Question'],
|
| 67 |
+
Expected_Output = row.get('Expected_Output') if pd.notna(row.get('Expected_Output')) else "No Expected Output Provided",
|
| 68 |
+
Respondent = row.get('Respondent') if pd.notna(row.get('Respondent')) else "No Respondent Provided",
|
| 69 |
+
Answer = row.get('Answer') if pd.notna(row.get('Answer')) else "No Answer Provided"
|
| 70 |
+
)
|
| 71 |
+
for row in df.to_dict('records')
|
| 72 |
+
]
|
| 73 |
+
|
| 74 |
+
return InterviewReport(Entries = qa_entries)
|
| 75 |
+
|
common/InterviewUtilities.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import json
|
| 3 |
+
|
| 4 |
+
def parse_expected_output_fields(expected_output_text):
|
| 5 |
+
"""
|
| 6 |
+
Parses expected_output_text into a list of (key, description) tuples.
|
| 7 |
+
"""
|
| 8 |
+
fields = []
|
| 9 |
+
lines = expected_output_text.strip().splitlines()
|
| 10 |
+
for line in lines:
|
| 11 |
+
if ':' not in line:
|
| 12 |
+
continue
|
| 13 |
+
key, description = line.split(':', 1)
|
| 14 |
+
fields.append((key.strip(), description.strip()))
|
| 15 |
+
return fields
|
| 16 |
+
|
| 17 |
+
def extract_fields_from_expected_output(expected_output_text):
|
| 18 |
+
"""
|
| 19 |
+
Returns just the list of keys (field names) from expected_output_text.
|
| 20 |
+
"""
|
| 21 |
+
parsed_fields = parse_expected_output_fields(expected_output_text)
|
| 22 |
+
return [key for key, _ in parsed_fields]
|
| 23 |
+
|
| 24 |
+
def split_json_string(text):
|
| 25 |
+
"""
|
| 26 |
+
Best of both worlds:
|
| 27 |
+
- Splits text into 'thought' and 'JSON' parts
|
| 28 |
+
- Scans for all possible { positions
|
| 29 |
+
- Cleans unescaped newlines inside quotes
|
| 30 |
+
- Strips junk between </think> and JSON if JSON exists
|
| 31 |
+
- Preserves full text after </think> if no JSON
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
# Step 1: Split at </think> if exists
|
| 35 |
+
if '</think>' in text:
|
| 36 |
+
thought_part, possible_json_part = text.split('</think>', 1)
|
| 37 |
+
thought_part = thought_part.strip()
|
| 38 |
+
possible_json_part = possible_json_part.strip()
|
| 39 |
+
else:
|
| 40 |
+
thought_part = None
|
| 41 |
+
possible_json_part = text.strip()
|
| 42 |
+
|
| 43 |
+
# Step 2: Find all { positions
|
| 44 |
+
brace_positions = [m.start() for m in re.finditer(r'{', possible_json_part)]
|
| 45 |
+
|
| 46 |
+
# Clean function: fix newlines inside quoted strings
|
| 47 |
+
def clean_json_formatting(text):
|
| 48 |
+
def fix_inside_quotes(match):
|
| 49 |
+
content = match.group(1)
|
| 50 |
+
fixed = content.replace('\n', '\\n').replace('\r', '\\n')
|
| 51 |
+
return f'"{fixed}"'
|
| 52 |
+
return re.sub(r'"(.*?)"', fix_inside_quotes, text, flags=re.DOTALL)
|
| 53 |
+
|
| 54 |
+
for pos in brace_positions:
|
| 55 |
+
candidate = possible_json_part[pos:].strip()
|
| 56 |
+
|
| 57 |
+
# Pre-clean
|
| 58 |
+
candidate = clean_json_formatting(candidate)
|
| 59 |
+
|
| 60 |
+
# Fix double braces if necessary
|
| 61 |
+
if candidate.startswith("{{") and "}}" in candidate:
|
| 62 |
+
candidate = candidate.replace("{{", "{", 1).replace("}}", "}", 1)
|
| 63 |
+
|
| 64 |
+
# Must start with {" or {'
|
| 65 |
+
if not re.match(r'^\{\s*["\']', candidate):
|
| 66 |
+
continue # not real JSON, skip
|
| 67 |
+
|
| 68 |
+
try:
|
| 69 |
+
json.loads(candidate)
|
| 70 |
+
# β
Successful parse
|
| 71 |
+
return thought_part, candidate
|
| 72 |
+
except json.JSONDecodeError:
|
| 73 |
+
continue # try next
|
| 74 |
+
|
| 75 |
+
# π No valid JSON found β return thought and full original remainder (no chopping)
|
| 76 |
+
return thought_part, possible_json_part
|
| 77 |
+
|
| 78 |
+
def extract_and_parse_json(result_text):
|
| 79 |
+
"""
|
| 80 |
+
Extracts and parses JSON output, handling cases where JSON is enclosed in triple backticks
|
| 81 |
+
(```json ... ```) or already correctly formatted `{}`.
|
| 82 |
+
|
| 83 |
+
Args:
|
| 84 |
+
result_text (str): The raw text output containing JSON data.
|
| 85 |
+
|
| 86 |
+
Returns:
|
| 87 |
+
dict or None: Parsed JSON object if successful, None otherwise.
|
| 88 |
+
"""
|
| 89 |
+
if not result_text:
|
| 90 |
+
print("π¨ No result text data received.")
|
| 91 |
+
return None
|
| 92 |
+
|
| 93 |
+
# π Clean unescaped line breaks that often break LLM JSON output
|
| 94 |
+
def clean_json_formatting(text):
|
| 95 |
+
# Replace unescaped newlines with a space
|
| 96 |
+
return re.sub(r'(?<!\\)\n', ' ', text)
|
| 97 |
+
|
| 98 |
+
# β
Try parsing directly after cleaning line breaks
|
| 99 |
+
cleaned_direct = clean_json_formatting(result_text)
|
| 100 |
+
try:
|
| 101 |
+
return json.loads(cleaned_direct)
|
| 102 |
+
except json.JSONDecodeError:
|
| 103 |
+
print("Unable to parse cleaned direct JSON.")
|
| 104 |
+
pass
|
| 105 |
+
|
| 106 |
+
# β
Try extracting JSON from triple backticks
|
| 107 |
+
match = re.search(r'```json\s*\n({[\s\S]+?})\n```', result_text, re.DOTALL)
|
| 108 |
+
if match:
|
| 109 |
+
try:
|
| 110 |
+
return json.loads(match.group(1).strip())
|
| 111 |
+
except json.JSONDecodeError:
|
| 112 |
+
pass # If still invalid, return None
|
| 113 |
+
|
| 114 |
+
print("π¨ No valid JSON found.")
|
| 115 |
+
return None # No valid JSON detected
|
| 116 |
+
|
| 117 |
+
def generate_json_expected_output(expected_output_text):
|
| 118 |
+
"""
|
| 119 |
+
Generates a JSON-style expected output based on expected_output_text.
|
| 120 |
+
"""
|
| 121 |
+
parsed_fields = parse_expected_output_fields(expected_output_text)
|
| 122 |
+
|
| 123 |
+
json_fields = []
|
| 124 |
+
for key, description in parsed_fields:
|
| 125 |
+
# Convert to JSON-style key (lowercase, underscores preserved)
|
| 126 |
+
json_key = key.lower()
|
| 127 |
+
json_fields.append(f' "{json_key}": {description},')
|
| 128 |
+
|
| 129 |
+
# Remove trailing comma from the last entry
|
| 130 |
+
if json_fields:
|
| 131 |
+
json_fields[-1] = json_fields[-1].rstrip(',')
|
| 132 |
+
|
| 133 |
+
# Join fields
|
| 134 |
+
json_body = "\n".join(json_fields)
|
| 135 |
+
|
| 136 |
+
output = (
|
| 137 |
+
"You must return your answer strictly in the following JSON format. "
|
| 138 |
+
"Do not include any markdown, commentary, or extra text. The response must be valid JSON:\n\n"
|
| 139 |
+
"{\n"
|
| 140 |
+
f"{json_body}\n"
|
| 141 |
+
"}"
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
return output
|
common/LLMConfig.py
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_openai import ChatOpenAI
|
| 2 |
+
from langchain_groq import ChatGroq
|
| 3 |
+
from langchain_together import ChatTogether
|
| 4 |
+
from crewai import LLM
|
| 5 |
+
from Config import Config
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
# ========== PUBLIC INTERFACE ==========
|
| 9 |
+
|
| 10 |
+
def get_respondent_agent_llm_instance(model_type=None):
|
| 11 |
+
|
| 12 |
+
# Default to Config if model_type is not specified
|
| 13 |
+
if not model_type:
|
| 14 |
+
model_type = Config.respondent_agent_host
|
| 15 |
+
model = Config.respondent_agent_model
|
| 16 |
+
api_key = Config.respondent_agent_api_key
|
| 17 |
+
url = Config.respondent_agent_url
|
| 18 |
+
temperature = Config.respondent_agent_temperature
|
| 19 |
+
top_p = Config.respondent_agent_top_p
|
| 20 |
+
frequency_penalty = Config.respondent_agent_frequency_penalty
|
| 21 |
+
presence_penalty = Config.respondent_agent_presence_penalty
|
| 22 |
+
|
| 23 |
+
# If model_type is specified, determine the prefix (e.g., "GROQ" for model_type="groq") to fetch values from env
|
| 24 |
+
else:
|
| 25 |
+
prefix = model_type.upper()
|
| 26 |
+
model = os.getenv(f"{prefix}_AGENT_MODEL")
|
| 27 |
+
api_key = os.getenv(f"{prefix}_API_KEY")
|
| 28 |
+
url = os.getenv(f"{prefix}_URL")
|
| 29 |
+
temperature = float(os.getenv(f"{prefix}_TEMPERATURE", 0.7))
|
| 30 |
+
top_p = float(os.getenv(f"{prefix}_TOP_P", 1.0))
|
| 31 |
+
frequency_penalty = float(os.getenv(f"{prefix}_FREQUENCY_PENALTY", 0.0))
|
| 32 |
+
presence_penalty = float(os.getenv(f"{prefix}_PRESENCE_PENALTY", 0.0))
|
| 33 |
+
|
| 34 |
+
if not api_key:
|
| 35 |
+
raise ValueError(f"API key not found for model_type={model_type}.")
|
| 36 |
+
if not model:
|
| 37 |
+
raise ValueError(f"Model not found for model_type={model_type}.")
|
| 38 |
+
|
| 39 |
+
print(f"Respondent Agent LLM: model_type={model_type}, model={model}, api_key={'*****' if api_key else 'MISSING'}, url={url}")
|
| 40 |
+
print(f"Params: temperature={temperature}, top_p={top_p}, frequency_penalty={frequency_penalty}, presence_penalty={presence_penalty}")
|
| 41 |
+
return get_crewai_instance(model_type, model, api_key, url, temperature, top_p, frequency_penalty, presence_penalty)
|
| 42 |
+
|
| 43 |
+
def get_processing_agent_llm_instance(model_type=None):
|
| 44 |
+
|
| 45 |
+
# Default to Config if model_type not specified
|
| 46 |
+
if not model_type:
|
| 47 |
+
model_type = Config.processing_agent_host
|
| 48 |
+
model = Config.processing_agent_model
|
| 49 |
+
api_key = Config.processing_agent_api_key
|
| 50 |
+
url = Config.processing_agent_url
|
| 51 |
+
temperature = Config.processing_agent_temperature
|
| 52 |
+
top_p = Config.processing_agent_top_p
|
| 53 |
+
frequency_penalty = Config.processing_agent_frequency_penalty
|
| 54 |
+
presence_penalty = Config.processing_agent_presence_penalty
|
| 55 |
+
|
| 56 |
+
# If model_type is specified, determine the prefix (e.g., "GROQ" for model_type="groq") to fetch values from env
|
| 57 |
+
else:
|
| 58 |
+
prefix = model_type.upper()
|
| 59 |
+
model = os.getenv(f"{prefix}_AGENT_MODEL")
|
| 60 |
+
api_key = os.getenv(f"{prefix}_API_KEY")
|
| 61 |
+
url = os.getenv(f"{prefix}_URL")
|
| 62 |
+
temperature = float(os.getenv(f"{prefix}_TEMPERATURE", 0.7))
|
| 63 |
+
top_p = float(os.getenv(f"{prefix}_TOP_P", 1.0))
|
| 64 |
+
frequency_penalty = float(os.getenv(f"{prefix}_FREQUENCY_PENALTY", 0.0))
|
| 65 |
+
presence_penalty = float(os.getenv(f"{prefix}_PRESENCE_PENALTY", 0.0))
|
| 66 |
+
|
| 67 |
+
if not api_key:
|
| 68 |
+
raise ValueError(f"API key not found for model_type={model_type}.")
|
| 69 |
+
if not model:
|
| 70 |
+
raise ValueError(f"Model not found for model_type={model_type}.")
|
| 71 |
+
|
| 72 |
+
print(f"Processing Agent LLM: model_type={model_type}, model={model}, api_key={'*****' if api_key else 'MISSING'}, url={url}")
|
| 73 |
+
print(f"Params: temperature={temperature}, top_p={top_p}, frequency_penalty={frequency_penalty}, presence_penalty={presence_penalty}")
|
| 74 |
+
return get_crewai_instance(model_type, model, api_key, url, temperature, top_p, frequency_penalty, presence_penalty)
|
| 75 |
+
|
| 76 |
+
def get_processor_llm_instance(model_type=None):
|
| 77 |
+
|
| 78 |
+
# Default to Config if model_type not specified
|
| 79 |
+
if not model_type:
|
| 80 |
+
model_type = Config.processor_host
|
| 81 |
+
model = Config.processor_model
|
| 82 |
+
api_key = Config.processor_api_key
|
| 83 |
+
url = Config.processor_url
|
| 84 |
+
temperature = Config.processor_temperature
|
| 85 |
+
top_p = Config.processor_top_p
|
| 86 |
+
frequency_penalty = Config.processor_frequency_penalty
|
| 87 |
+
presence_penalty = Config.processor_presence_penalty
|
| 88 |
+
|
| 89 |
+
# If model_type is specified, determine the prefix (e.g., "GROQ" for model_type="groq") to fetch values from env
|
| 90 |
+
else:
|
| 91 |
+
prefix = model_type.upper()
|
| 92 |
+
model = os.getenv(f"{prefix}_AGENT_MODEL")
|
| 93 |
+
api_key = os.getenv(f"{prefix}_API_KEY")
|
| 94 |
+
url = os.getenv(f"{prefix}_URL")
|
| 95 |
+
temperature = float(os.getenv(f"{prefix}_TEMPERATURE", 0.7))
|
| 96 |
+
top_p = float(os.getenv(f"{prefix}_TOP_P", 1.0))
|
| 97 |
+
frequency_penalty = float(os.getenv(f"{prefix}_FREQUENCY_PENALTY", 0.0))
|
| 98 |
+
presence_penalty = float(os.getenv(f"{prefix}_PRESENCE_PENALTY", 0.0))
|
| 99 |
+
|
| 100 |
+
if not api_key:
|
| 101 |
+
raise ValueError(f"API key not found for model_type={model_type}.")
|
| 102 |
+
if not model:
|
| 103 |
+
raise ValueError(f"Model not found for model_type={model_type}.")
|
| 104 |
+
|
| 105 |
+
print(f"Processor LLM: model_type={model_type}, model={model}, api_key={'*****' if api_key else 'MISSING'}, url={url}")
|
| 106 |
+
print(f"Params: temperature={temperature}, top_p={top_p}, frequency_penalty={frequency_penalty}, presence_penalty={presence_penalty}")
|
| 107 |
+
return get_langchain_instance(model_type, model, api_key, url, temperature, top_p, frequency_penalty, presence_penalty)
|
| 108 |
+
|
| 109 |
+
# ========== INTERNAL HELPERS ==========
|
| 110 |
+
|
| 111 |
+
def get_crewai_instance(model_type, model, api_key, url, temperature, top_p, frequency_penalty, presence_penalty):
|
| 112 |
+
model_type = model_type.lower()
|
| 113 |
+
|
| 114 |
+
if model_type == 'groq':
|
| 115 |
+
return ChatGroq(groq_api_key=api_key, model_name=f"{model_type}/{model}", temperature=temperature, model_kwargs={})
|
| 116 |
+
|
| 117 |
+
common_args = {
|
| 118 |
+
"temperature": temperature,
|
| 119 |
+
"top_p": top_p,
|
| 120 |
+
"frequency_penalty": frequency_penalty,
|
| 121 |
+
"presence_penalty": presence_penalty
|
| 122 |
+
}
|
| 123 |
+
common_args = {k: v for k, v in common_args.items() if v is not None} # Remove None values
|
| 124 |
+
|
| 125 |
+
if model_type == 'openai':
|
| 126 |
+
return ChatOpenAI(model=model, api_key=api_key, **common_args)
|
| 127 |
+
elif model_type == 'openrouter':
|
| 128 |
+
return ChatOpenAI(base_url=url, model=f"{model_type}/{model}", api_key=api_key, **common_args)
|
| 129 |
+
elif model_type == 'together_ai':
|
| 130 |
+
return LLM(model=f"{model_type}/{model}", api_key=api_key, api_base=url, **common_args)
|
| 131 |
+
else:
|
| 132 |
+
raise ValueError(f"Unsupported model type for CrewAI: {model_type}")
|
| 133 |
+
|
| 134 |
+
def get_langchain_instance(model_type, model, api_key, url, temperature, top_p, frequency_penalty, presence_penalty):
|
| 135 |
+
model_type = model_type.lower()
|
| 136 |
+
|
| 137 |
+
if model_type == 'groq':
|
| 138 |
+
return ChatGroq(groq_api_key=api_key, model_name=model, temperature=temperature, model_kwargs={})
|
| 139 |
+
|
| 140 |
+
common_args = {
|
| 141 |
+
"temperature": temperature,
|
| 142 |
+
"top_p": top_p,
|
| 143 |
+
"frequency_penalty": frequency_penalty,
|
| 144 |
+
"presence_penalty": presence_penalty
|
| 145 |
+
}
|
| 146 |
+
common_args = {k: v for k, v in common_args.items() if v is not None} # Remove None values
|
| 147 |
+
|
| 148 |
+
if model_type == 'openai':
|
| 149 |
+
return ChatOpenAI(model=model, api_key=api_key, **common_args)
|
| 150 |
+
elif model_type == 'openrouter':
|
| 151 |
+
return ChatOpenAI(base_url=url, model=model, api_key=api_key, **common_args)
|
| 152 |
+
elif model_type == 'together_ai':
|
| 153 |
+
return ChatTogether(model=model, together_api_key=api_key, **common_args)
|
| 154 |
+
else:
|
| 155 |
+
raise ValueError(f"Unsupported model type for LangChain: {model_type}")
|
common/PersonalityValues.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
from typing import List, Dict, Optional
|
| 3 |
+
from collections import defaultdict
|
| 4 |
+
|
| 5 |
+
import datetime
|
| 6 |
+
import json
|
| 7 |
+
import os
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import re
|
| 10 |
+
import numpy as np
|
| 11 |
+
import pprint
|
| 12 |
+
import math
|
| 13 |
+
|
| 14 |
+
from UserProfile import *
|
| 15 |
+
|
| 16 |
+
class PVEntry(BaseModel):
|
| 17 |
+
Num: int
|
| 18 |
+
Value: str
|
| 19 |
+
Question: str
|
| 20 |
+
Criteria: Optional[str]
|
| 21 |
+
Rating_Definition: Optional[str]
|
| 22 |
+
Adjacent_Values: Optional[List[str]]
|
| 23 |
+
Opposite_Values: Optional[List[str]]
|
| 24 |
+
Answer: Optional[str]
|
| 25 |
+
Score: Optional[int]
|
| 26 |
+
Assessment: Optional[str]
|
| 27 |
+
|
| 28 |
+
def parse_values(value_str: Optional[str], delimiter: str = ",") -> List[str]:
|
| 29 |
+
"""
|
| 30 |
+
Parses a delimited string into a list of strings.
|
| 31 |
+
If the value is None or NaN, return an empty list.
|
| 32 |
+
|
| 33 |
+
Args:
|
| 34 |
+
value_str (Optional[str]): The input string to parse.
|
| 35 |
+
delimiter (str): The delimiter to use for splitting. Defaults to ','.
|
| 36 |
+
|
| 37 |
+
Returns:
|
| 38 |
+
List[str]: A list of trimmed strings.
|
| 39 |
+
"""
|
| 40 |
+
if pd.isna(value_str) or not isinstance(value_str, str):
|
| 41 |
+
return []
|
| 42 |
+
return [v.strip() for v in value_str.split(delimiter)]
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def extract_values_from_assessment_file(assessment_file):
|
| 46 |
+
"""
|
| 47 |
+
Extracts and aggregates Value and Score pairs from an Excel file by summing scores.
|
| 48 |
+
|
| 49 |
+
Args:
|
| 50 |
+
assessment_file (str): Path to the Excel file.
|
| 51 |
+
|
| 52 |
+
Returns:
|
| 53 |
+
list: A list of dictionaries with Value and total Score.
|
| 54 |
+
"""
|
| 55 |
+
# Read the Excel file
|
| 56 |
+
df = pd.read_excel(assessment_file)
|
| 57 |
+
|
| 58 |
+
# Ensure required columns are present
|
| 59 |
+
if "Value" not in df.columns or "Score" not in df.columns:
|
| 60 |
+
raise ValueError("The file must contain 'Value' and 'Score' columns.")
|
| 61 |
+
|
| 62 |
+
# Clean the data
|
| 63 |
+
df_clean = df[["Value", "Score"]].dropna()
|
| 64 |
+
df_clean["Score"] = pd.to_numeric(df_clean["Score"], errors="coerce")
|
| 65 |
+
|
| 66 |
+
# Group by Value and sum the scores
|
| 67 |
+
aggregated = df_clean.groupby("Value", as_index=False).sum()
|
| 68 |
+
|
| 69 |
+
# Convert to list of dictionaries
|
| 70 |
+
return aggregated.to_dict(orient="records")
|
| 71 |
+
|
| 72 |
+
class PVAssessment(BaseModel):
|
| 73 |
+
Entries: dict[str, list[PVEntry]]
|
| 74 |
+
|
| 75 |
+
@staticmethod
|
| 76 |
+
def generate_personality_assessment(personality_file):
|
| 77 |
+
df = pd.read_excel(personality_file)
|
| 78 |
+
|
| 79 |
+
# Use defaultdict to allow appending multiple PVEntries per value
|
| 80 |
+
entries = defaultdict(list)
|
| 81 |
+
|
| 82 |
+
for _, row in df.iterrows():
|
| 83 |
+
pv_entry = PVEntry(
|
| 84 |
+
Num=row["Num"],
|
| 85 |
+
Value=row["Value"],
|
| 86 |
+
Question=row["Assessment_Question"],
|
| 87 |
+
Criteria=row["Assessment_Criteria"],
|
| 88 |
+
Rating_Definition=row["Rating_Definition"],
|
| 89 |
+
Adjacent_Values=parse_values(row["Adjacent_Values"]),
|
| 90 |
+
Opposite_Values=parse_values(row["Opposite_Values"]),
|
| 91 |
+
Answer=None,
|
| 92 |
+
Score=None,
|
| 93 |
+
Assessment=None
|
| 94 |
+
)
|
| 95 |
+
entries[row["Value"]].append(pv_entry)
|
| 96 |
+
|
| 97 |
+
return PVAssessment(Entries=dict(entries))
|
| 98 |
+
|
| 99 |
+
@staticmethod
|
| 100 |
+
def get_score_definition(value, score, pv_assessment):
|
| 101 |
+
"""
|
| 102 |
+
Converts a numerical score (1-50) into a corresponding rating definition.
|
| 103 |
+
|
| 104 |
+
Args:
|
| 105 |
+
value (str): The personality value key.
|
| 106 |
+
score (int): A numerical score between 1 and 50.
|
| 107 |
+
pv_assessment (PVAssessment): The personality assessment object.
|
| 108 |
+
|
| 109 |
+
Returns:
|
| 110 |
+
str: The corresponding rating definition, or an empty string if not found.
|
| 111 |
+
"""
|
| 112 |
+
if not isinstance(pv_assessment, PVAssessment):
|
| 113 |
+
print("Error: Expected a PVAssessment object.")
|
| 114 |
+
return ""
|
| 115 |
+
|
| 116 |
+
if not isinstance(score, int) or score < 1 or score > 50:
|
| 117 |
+
print(f"Error: Invalid score '{score}' for '{value}'. Expected a number between 1 and 50.")
|
| 118 |
+
return ""
|
| 119 |
+
|
| 120 |
+
entry_list = pv_assessment.Entries.get(value)
|
| 121 |
+
if not entry_list or not isinstance(entry_list, list) or len(entry_list) == 0:
|
| 122 |
+
print(f"Error: No entries found for value '{value}'.")
|
| 123 |
+
return ""
|
| 124 |
+
|
| 125 |
+
# Use the first PVEntry in the list
|
| 126 |
+
pv_entry = entry_list[0]
|
| 127 |
+
|
| 128 |
+
rating_definition = pv_entry.Rating_Definition
|
| 129 |
+
if not isinstance(rating_definition, str) or not rating_definition:
|
| 130 |
+
print(f"Error: No valid rating definition found for '{value}'.")
|
| 131 |
+
return ""
|
| 132 |
+
|
| 133 |
+
rating_definition_list = parse_values(rating_definition, delimiter=";")
|
| 134 |
+
|
| 135 |
+
# Find the corresponding description based on the score range
|
| 136 |
+
for definition in rating_definition_list:
|
| 137 |
+
try:
|
| 138 |
+
range_part, description = definition.split(":", 1)
|
| 139 |
+
range_part = range_part.strip()
|
| 140 |
+
|
| 141 |
+
if "-" in range_part:
|
| 142 |
+
range_lower, range_upper = map(int, range_part.split("-"))
|
| 143 |
+
else:
|
| 144 |
+
range_lower = range_upper = int(range_part)
|
| 145 |
+
|
| 146 |
+
if range_lower <= score <= range_upper:
|
| 147 |
+
return description.strip()
|
| 148 |
+
|
| 149 |
+
except ValueError:
|
| 150 |
+
print(f"Error: Invalid rating definition format for '{value}': {definition}")
|
| 151 |
+
continue
|
| 152 |
+
|
| 153 |
+
print(f"Error: No matching rating definition found for score {score} in '{value}'.")
|
| 154 |
+
return ""
|
common/RespondentAgent.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from crewai import Agent,Task,Process,Crew
|
| 2 |
+
from crewai_tools import FileReadTool, TXTSearchTool
|
| 3 |
+
from crewai.tasks import OutputFormat
|
| 4 |
+
from pydantic import BaseModel
|
| 5 |
+
from typing import List, Dict
|
| 6 |
+
|
| 7 |
+
import datetime
|
| 8 |
+
import json
|
| 9 |
+
import os
|
| 10 |
+
import pandas as pd
|
| 11 |
+
import pprint
|
| 12 |
+
|
| 13 |
+
from UserProfile import *
|
| 14 |
+
|
| 15 |
+
class RespondentAgent:
|
| 16 |
+
def __init__(self, user_profile, agent):
|
| 17 |
+
self.user_profile = user_profile
|
| 18 |
+
self.agent = agent
|
| 19 |
+
|
| 20 |
+
def set_user_profile(self, user_profile):
|
| 21 |
+
self.user_profile = user_profile
|
| 22 |
+
|
| 23 |
+
def set_agent(self, agent):
|
| 24 |
+
self.agent = agent
|
| 25 |
+
|
| 26 |
+
def __repr__(self):
|
| 27 |
+
return f"RespondentAgent(user_profile={self.user_profile}, agent={self.agent})"
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
@staticmethod
|
| 31 |
+
def create(user_profile, agent_detail_file, llm, respondent_type="INDIVIDUAL USER"):
|
| 32 |
+
"""
|
| 33 |
+
Static method to create a respondent agent using user or group data and other details.
|
| 34 |
+
|
| 35 |
+
:param user_profile: The backstory and user information (individual or group).
|
| 36 |
+
:param agent_detail_file: File that contains fast facts or additional context.
|
| 37 |
+
:param llm: The language model to be used by the agent.
|
| 38 |
+
:param respondent_type: Specifies what type of respondent to simulate - individual, focus group or individual user from a segment
|
| 39 |
+
:return: A configured Agent object.
|
| 40 |
+
"""
|
| 41 |
+
|
| 42 |
+
# Unified conditional block
|
| 43 |
+
if respondent_type == "FOCUS GROUP":
|
| 44 |
+
diversity_note = "your focus group"
|
| 45 |
+
elif respondent_type == "INDIVIDUAL USER REPRESENTING A SEGMENT":
|
| 46 |
+
diversity_note = "your segment"
|
| 47 |
+
else:
|
| 48 |
+
diversity_note = "your unique voice"
|
| 49 |
+
|
| 50 |
+
user_profile_str = repr(user_profile)
|
| 51 |
+
|
| 52 |
+
myRole = f"""
|
| 53 |
+
You are simulating a {respondent_type} respondent based on the following {respondent_type} profile details:
|
| 54 |
+
|
| 55 |
+
{user_profile_str}
|
| 56 |
+
|
| 57 |
+
You must draw on your {respondent_type} profile in each and every one of your tasks.
|
| 58 |
+
"""
|
| 59 |
+
|
| 60 |
+
print(f"Role: {myRole}")
|
| 61 |
+
|
| 62 |
+
myGoal = f"""
|
| 63 |
+
This is your VERY IMPORTANT GOAL:
|
| 64 |
+
|
| 65 |
+
You represent a **{respondent_type}** with a specific **demographic background, cultural context, and lived experience**.
|
| 66 |
+
Your responses must reflect the **{diversity_note}**, while staying grounded in the boundaries of **your {respondent_type}**.
|
| 67 |
+
|
| 68 |
+
You MAY BE guided by real-life behavioral drivers, grounded in the **Jobs To Be Done (JTBD) framework**.
|
| 69 |
+
If the JTBD is provided, then use the following internal map to shape your responses and decision-making:
|
| 70 |
+
|
| 71 |
+
### π§ JTBD-Driven Behavioral Profile
|
| 72 |
+
- **Context of Use**: When and where the product fits into your life (e.g., "mid-morning at work", "after a long day").
|
| 73 |
+
- **Trigger**: What causes you to consider using it (e.g., stress, thirst, social situation).
|
| 74 |
+
- **Motivation**: What you are hoping to achieve (e.g., energy, comfort, focus).
|
| 75 |
+
- **Need of the Moment**: What you immediately need resolved or improved.
|
| 76 |
+
- **Perceived Solution**: How you think the product helps you.
|
| 77 |
+
- **Barrier**: What might stop you from using it (e.g., cost, health concern, skepticism).
|
| 78 |
+
- **Top 3 JTBDs**: The most important use cases you believe the product addresses.
|
| 79 |
+
- **Solution**: The overall benefit or outcome you believe the product offers.
|
| 80 |
+
|
| 81 |
+
Use these internal drivers to shape your thoughts, behaviors, tone, and choicesβ**even when not directly stated**.
|
| 82 |
+
|
| 83 |
+
---
|
| 84 |
+
|
| 85 |
+
### **General Guidelines:**
|
| 86 |
+
- Keep responses natural and authenticβrooted in **realistic thought patterns, language, and perspectives**.
|
| 87 |
+
- Stay consistent with the values, tone, and lived experience of **your {respondent_type}**.
|
| 88 |
+
- Avoid sounding genericβ**express culturally and contextually grounded opinions**.
|
| 89 |
+
|
| 90 |
+
---
|
| 91 |
+
|
| 92 |
+
### **For Interviews:**
|
| 93 |
+
- Speak as if in a real conversationβ**relatable, honest, and situational**.
|
| 94 |
+
- Use your JTBD profile to explain *why* youβd use a product, hesitate, or find value in something.
|
| 95 |
+
- Refer to triggers, motivations, or barriers in your own words (e.g., "I usually reach for this when...", "Iβm not sure because...").
|
| 96 |
+
|
| 97 |
+
---
|
| 98 |
+
|
| 99 |
+
### **For Surveys:**
|
| 100 |
+
- Choose answers that match **your JTBD profile** and **personal logic**.
|
| 101 |
+
- Give justifications that reflect your real internal reasoning (e.g., "because it helps me focus when Iβm stressed", "I donβt trust drinks with too many ingredients").
|
| 102 |
+
- Stay internally consistentβresponses should always match **your JTBD-based identity**.
|
| 103 |
+
|
| 104 |
+
---
|
| 105 |
+
|
| 106 |
+
### **Cultural & Personalisation Considerations:**
|
| 107 |
+
- Maintain a clear sense of **{respondent_type} identity** throughoutβhow they think, speak, and decide.
|
| 108 |
+
- Avoid artificial or overly polished languageβspeak with **personal texture and social realism**.
|
| 109 |
+
- Match the **tone and voice** to what feels natural for **your {respondent_type}**, whether formal, casual, reserved, or expressive.
|
| 110 |
+
|
| 111 |
+
By following these guidelines and grounding your behavior in the JTBD framework, your responses will remain **authentic, contextually appropriate, and true to your {respondent_type.lower()} profile**.
|
| 112 |
+
"""
|
| 113 |
+
|
| 114 |
+
# Initialize myBackstory with a default value
|
| 115 |
+
myBackstory = f"No backstory available. Focus on your {respondent_type} profile and VERY IMPORTANT GOAL instead."
|
| 116 |
+
|
| 117 |
+
if agent_detail_file is not None and os.path.isfile(agent_detail_file):
|
| 118 |
+
print(f"Reading fast facts from {agent_detail_file}")
|
| 119 |
+
|
| 120 |
+
fast_facts = FastFacts.read_from_excel(agent_detail_file)
|
| 121 |
+
|
| 122 |
+
if fast_facts:
|
| 123 |
+
fast_facts_str = repr(fast_facts)
|
| 124 |
+
myBackstory = f"""
|
| 125 |
+
Your BACKSTORY has been enriched with a set of FAST FACTS about the {respondent_type} whose responses you are simulating.
|
| 126 |
+
|
| 127 |
+
You must draw on your BACKSTORY FAST FACTS details in each and every one of your tasks.
|
| 128 |
+
|
| 129 |
+
Your BACKSTORY FAST FACTS details are as follows:
|
| 130 |
+
|
| 131 |
+
{fast_facts_str}
|
| 132 |
+
"""
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
print(f"Backstory: {myBackstory}")
|
| 137 |
+
else:
|
| 138 |
+
print(f"No fast facts file found: {agent_detail_file}")
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
# Create agent object
|
| 142 |
+
agent = Agent(
|
| 143 |
+
role=myRole,
|
| 144 |
+
goal=myGoal,
|
| 145 |
+
backstory=myBackstory,
|
| 146 |
+
llm=llm,
|
| 147 |
+
verbose=True,
|
| 148 |
+
max_retry_limit=5,
|
| 149 |
+
allow_delegation=False,
|
| 150 |
+
memory=True
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
return RespondentAgent(user_profile, agent)
|
common/UserProfile.py
ADDED
|
@@ -0,0 +1,359 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import datetime
|
| 3 |
+
import textwrap
|
| 4 |
+
|
| 5 |
+
from Config import Config
|
| 6 |
+
from DataDictionary import *
|
| 7 |
+
from FastFacts import *
|
| 8 |
+
from PersonalityValues import *
|
| 9 |
+
|
| 10 |
+
import pandas as pd
|
| 11 |
+
import numpy as np
|
| 12 |
+
|
| 13 |
+
class AttributeGroup:
|
| 14 |
+
"""
|
| 15 |
+
Represents an attribute group (type) in the user profile.
|
| 16 |
+
Fields are dynamically populated based on the group's parameters.
|
| 17 |
+
"""
|
| 18 |
+
def __init__(self, group_name, fields):
|
| 19 |
+
self.group_name = group_name
|
| 20 |
+
self.fields = {field: None for field in fields}
|
| 21 |
+
|
| 22 |
+
def set_field(self, field_name, value):
|
| 23 |
+
"""
|
| 24 |
+
Set a value for a specific field in the attribute group.
|
| 25 |
+
If the field does not exist, it is added dynamically.
|
| 26 |
+
"""
|
| 27 |
+
if field_name not in self.fields:
|
| 28 |
+
print(f"Warning: Field '{field_name}' not found in '{self.group_name}'. Adding dynamically.")
|
| 29 |
+
self.fields[field_name] = None # Add the field dynamically
|
| 30 |
+
|
| 31 |
+
self.fields[field_name] = value # Assign the provided value
|
| 32 |
+
|
| 33 |
+
def get_field(self, field_name):
|
| 34 |
+
"""
|
| 35 |
+
Get a value for a specific field in the attribute group.
|
| 36 |
+
"""
|
| 37 |
+
if field_name in self.fields:
|
| 38 |
+
return self.fields[field_name]
|
| 39 |
+
else:
|
| 40 |
+
print(f"Field '{field_name}' does not exist in the '{self.group_name}' attribute group.")
|
| 41 |
+
|
| 42 |
+
def to_dict(self):
|
| 43 |
+
"""
|
| 44 |
+
Convert the attribute group to a dictionary with non-null values.
|
| 45 |
+
"""
|
| 46 |
+
return {field: value for field, value in self.fields.items() if value is not None}
|
| 47 |
+
|
| 48 |
+
def __repr__(self):
|
| 49 |
+
"""
|
| 50 |
+
String representation of the attribute group with non-null fields.
|
| 51 |
+
"""
|
| 52 |
+
fields_repr = ", ".join(f"{k}={v}" for k, v in self.fields.items() if v is not None)
|
| 53 |
+
return f"{self.group_name}({fields_repr})"
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
class UserProfile:
|
| 57 |
+
"""
|
| 58 |
+
Represents a user profile, dynamically initialised with attribute groups based on the DataDictionary.
|
| 59 |
+
Includes a lazily initialised FastFacts section for storing additional facts about the user.
|
| 60 |
+
"""
|
| 61 |
+
def __init__(self, data_dictionary):
|
| 62 |
+
self.data_dictionary = data_dictionary # Store the data dictionary for dynamic group creation
|
| 63 |
+
self.attribute_groups = {} # Dictionary to hold created attribute groups
|
| 64 |
+
self.ID = None # Unique identifier for the user profile
|
| 65 |
+
self.fast_facts = None # Lazily initialised FastFacts attribute
|
| 66 |
+
|
| 67 |
+
def set_ID(self, ID):
|
| 68 |
+
"""
|
| 69 |
+
Set the ID for the user profile.
|
| 70 |
+
"""
|
| 71 |
+
self.ID = ID
|
| 72 |
+
|
| 73 |
+
def set_field(self, group_name, field_name, value):
|
| 74 |
+
"""
|
| 75 |
+
Set a value for a field in a specific attribute group.
|
| 76 |
+
If the group does not already exist, it will be created dynamically.
|
| 77 |
+
"""
|
| 78 |
+
if group_name not in self.attribute_groups:
|
| 79 |
+
# Create the AttributeGroup only when needed
|
| 80 |
+
if group_name in self.data_dictionary.get_types():
|
| 81 |
+
self.attribute_groups[group_name] = AttributeGroup(
|
| 82 |
+
group_name,
|
| 83 |
+
self.data_dictionary.get_parameters(type=group_name)
|
| 84 |
+
)
|
| 85 |
+
else:
|
| 86 |
+
print(f"Attribute group '{group_name}' is not defined in the DataDictionary.")
|
| 87 |
+
return
|
| 88 |
+
self.attribute_groups[group_name].set_field(field_name, value)
|
| 89 |
+
|
| 90 |
+
def get_field(self, group_name, field_name):
|
| 91 |
+
"""
|
| 92 |
+
Get a value for a field in a specific attribute group.
|
| 93 |
+
"""
|
| 94 |
+
if group_name not in self.attribute_groups:
|
| 95 |
+
print(f"Attribute group '{group_name}' is not found.")
|
| 96 |
+
return None
|
| 97 |
+
|
| 98 |
+
return self.attribute_groups[group_name].get_field(field_name)
|
| 99 |
+
|
| 100 |
+
def set_fields_from_list(self, attribute_type, fields, field_key="field_name", value_key="value"):
|
| 101 |
+
"""
|
| 102 |
+
Sets fields in a UserProfile from a list of field-value pairs.
|
| 103 |
+
|
| 104 |
+
Args:
|
| 105 |
+
attribute_type (str): The name of the attribute group (e.g., "Values").
|
| 106 |
+
fields (list): A list of dictionaries with field names and values to set.
|
| 107 |
+
field_key (str): The key in the dictionary that corresponds to the field name.
|
| 108 |
+
value_key (str): The key in the dictionary that corresponds to the value.
|
| 109 |
+
"""
|
| 110 |
+
if not isinstance(fields, list) or not all(isinstance(field, dict) for field in fields):
|
| 111 |
+
print("Fields must be a list of dictionaries.")
|
| 112 |
+
return
|
| 113 |
+
|
| 114 |
+
for field in fields:
|
| 115 |
+
field_name = field.get(field_key) # Use the specified key for field names
|
| 116 |
+
value = field.get(value_key) # Use the specified key for values
|
| 117 |
+
|
| 118 |
+
if field_name is not None and value is not None:
|
| 119 |
+
self.set_field(attribute_type, field_name, value)
|
| 120 |
+
else:
|
| 121 |
+
print(f"Skipping invalid field: {field}")
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def get_attributes(self, attribute_type=None):
|
| 125 |
+
"""
|
| 126 |
+
Retrieve attributes for a specific attribute type or all attributes if no type is specified.
|
| 127 |
+
|
| 128 |
+
Args:
|
| 129 |
+
attribute_type (str, optional): The name of the attribute group to retrieve.
|
| 130 |
+
If None, retrieves all attributes.
|
| 131 |
+
|
| 132 |
+
Returns:
|
| 133 |
+
dict: A dictionary of non-null attributes for the specified type or all types.
|
| 134 |
+
"""
|
| 135 |
+
if attribute_type:
|
| 136 |
+
if attribute_type in self.attribute_groups:
|
| 137 |
+
return self.attribute_groups[attribute_type].to_dict()
|
| 138 |
+
else:
|
| 139 |
+
print(f"Attribute type '{attribute_type}' does not exist in this user profile.")
|
| 140 |
+
return {}
|
| 141 |
+
else:
|
| 142 |
+
# Combine all attributes if no specific type is specified
|
| 143 |
+
all_attributes = {}
|
| 144 |
+
for group_name, group in self.attribute_groups.items():
|
| 145 |
+
all_attributes.update({f"{group_name}_{k}": v for k, v in group.to_dict().items()})
|
| 146 |
+
return all_attributes
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
def add_fast_facts(self, facts):
|
| 150 |
+
"""
|
| 151 |
+
Lazily initialise and add a set of facts to the FastFacts attribute.
|
| 152 |
+
|
| 153 |
+
Args:
|
| 154 |
+
facts (iterable): A collection of facts to add to FastFacts.
|
| 155 |
+
"""
|
| 156 |
+
if not isinstance(facts, (set, list)):
|
| 157 |
+
print("Facts must be provided as a set or list.")
|
| 158 |
+
return
|
| 159 |
+
|
| 160 |
+
if self.fast_facts is None:
|
| 161 |
+
self.fast_facts = FastFacts()
|
| 162 |
+
|
| 163 |
+
self.fast_facts.add_facts(facts)
|
| 164 |
+
|
| 165 |
+
def to_dict(self, data_dictionary):
|
| 166 |
+
"""
|
| 167 |
+
Convert the entire user profile to a dictionary.
|
| 168 |
+
"""
|
| 169 |
+
profile_dict = {'ID': self.ID}
|
| 170 |
+
|
| 171 |
+
# Iterate over all types in the data dictionary
|
| 172 |
+
for attribute_type in data_dictionary.get_types():
|
| 173 |
+
group_attributes = self.get_attributes(attribute_type)
|
| 174 |
+
for field_name in data_dictionary.get_parameters(type=attribute_type):
|
| 175 |
+
full_field_name = f"{attribute_type}_{field_name}"
|
| 176 |
+
|
| 177 |
+
# Access the value directly from the dictionary
|
| 178 |
+
value = group_attributes.get(field_name)
|
| 179 |
+
|
| 180 |
+
# Ensure value exists before updating the profile_dict
|
| 181 |
+
if value is not None:
|
| 182 |
+
profile_dict[full_field_name] = value
|
| 183 |
+
else:
|
| 184 |
+
print(f"Warning: {field_name} not found in type {attribute_type}")
|
| 185 |
+
|
| 186 |
+
return profile_dict
|
| 187 |
+
|
| 188 |
+
def __repr__(self):
|
| 189 |
+
"""
|
| 190 |
+
String representation of the user profile with attribute groups.
|
| 191 |
+
"""
|
| 192 |
+
groups_repr = ", ".join(str(group) for group in self.attribute_groups.values())
|
| 193 |
+
return f"UserProfile(ID={self.ID}, {groups_repr})"
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
@staticmethod
|
| 197 |
+
def write_user_profiles_to_excel(user_profiles, filename, data_dictionary):
|
| 198 |
+
"""
|
| 199 |
+
Writes a list of UserProfile objects to an Excel file with columns ordered by the data dictionary.
|
| 200 |
+
|
| 201 |
+
Args:
|
| 202 |
+
user_profiles (list): List of UserProfile objects.
|
| 203 |
+
filename (str): Path to the Excel file.
|
| 204 |
+
data_dictionary (object): Data dictionary containing column order and metadata.
|
| 205 |
+
"""
|
| 206 |
+
if not user_profiles:
|
| 207 |
+
print("No user profiles to write.")
|
| 208 |
+
return
|
| 209 |
+
|
| 210 |
+
# Convert user profiles to a list of dictionaries
|
| 211 |
+
# profiles_data = [user_profile.to_dict(data_dictionary) for user_profile in user_profiles]
|
| 212 |
+
profiles_data = []
|
| 213 |
+
|
| 214 |
+
for i, user_profile in enumerate(user_profiles):
|
| 215 |
+
print(f"Processing profile {i+1}: {user_profile}")
|
| 216 |
+
profile_dict = user_profile.to_dict(data_dictionary)
|
| 217 |
+
print(f"Dict output: {profile_dict}")
|
| 218 |
+
profiles_data.append(profile_dict)
|
| 219 |
+
|
| 220 |
+
# Get the column order from the data dictionary
|
| 221 |
+
column_order = ['ID'] + data_dictionary.get_columns() # Ensure this method exists and returns the column names in the desired order
|
| 222 |
+
|
| 223 |
+
# Create a DataFrame from the profiles data
|
| 224 |
+
df = pd.DataFrame(profiles_data)
|
| 225 |
+
|
| 226 |
+
# Ensure all columns in the data dictionary are present in the DataFrame
|
| 227 |
+
for column in column_order:
|
| 228 |
+
if column not in df.columns:
|
| 229 |
+
print(f"Column {column} is missing")
|
| 230 |
+
df[column] = None # Add missing columns with NaN/None
|
| 231 |
+
|
| 232 |
+
# Reorder columns based on the data dictionary
|
| 233 |
+
df = df[column_order]
|
| 234 |
+
|
| 235 |
+
# Write the DataFrame to an Excel file
|
| 236 |
+
df.to_excel(filename, index=False)
|
| 237 |
+
|
| 238 |
+
print(f"User profiles successfully written to {filename}")
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
@staticmethod
|
| 242 |
+
def read_user_profiles_from_excel(respondent_details_file, data_dictionary, pv_criteria):
|
| 243 |
+
"""
|
| 244 |
+
Reads a list of UserProfile objects from an Excel file and converts scores into definitions.
|
| 245 |
+
|
| 246 |
+
Args:
|
| 247 |
+
respondent_details_file (str): Path to the Excel file.
|
| 248 |
+
data_dictionary (DataDictionary): Instance of DataDictionary containing valid fields.
|
| 249 |
+
pv_criteria (PVAssessment): Instance of PVAssessment to retrieve text descriptions of values.
|
| 250 |
+
|
| 251 |
+
Returns:
|
| 252 |
+
list: List of UserProfile objects.
|
| 253 |
+
"""
|
| 254 |
+
user_profiles = []
|
| 255 |
+
|
| 256 |
+
# Read the Excel file into a DataFrame
|
| 257 |
+
df = pd.read_excel(respondent_details_file)
|
| 258 |
+
|
| 259 |
+
# Iterate over the rows in the DataFrame
|
| 260 |
+
for _, row in df.iterrows():
|
| 261 |
+
user_profile = UserProfile(data_dictionary)
|
| 262 |
+
|
| 263 |
+
# Set basic fields for UserProfile if they are present
|
| 264 |
+
if pd.notna(row.get('ID')):
|
| 265 |
+
user_profile.set_ID(row.get('ID'))
|
| 266 |
+
|
| 267 |
+
# Iterate over all types in the data dictionary
|
| 268 |
+
for attribute_type in data_dictionary.get_types():
|
| 269 |
+
for field in data_dictionary.get_parameters(type=attribute_type):
|
| 270 |
+
full_field_name = f"{attribute_type}_{field}"
|
| 271 |
+
raw_value = row.get(full_field_name)
|
| 272 |
+
|
| 273 |
+
if pd.notna(raw_value):
|
| 274 |
+
# Special handling for Values type
|
| 275 |
+
if attribute_type.lower() == "values":
|
| 276 |
+
try:
|
| 277 |
+
score = int(raw_value) # Convert to integer score
|
| 278 |
+
# Get description from PVAssessment
|
| 279 |
+
description = PVAssessment.get_score_definition(field, score, pv_criteria)
|
| 280 |
+
|
| 281 |
+
# Set score field
|
| 282 |
+
#user_profile.set_field(attribute_type, field, score)
|
| 283 |
+
|
| 284 |
+
# Set description field
|
| 285 |
+
description_field_name = f"{attribute_type}_{field}_Description"
|
| 286 |
+
user_profile.set_field(attribute_type, description_field_name, description)
|
| 287 |
+
|
| 288 |
+
except ValueError:
|
| 289 |
+
print(f"Warning: Could not convert '{raw_value}' to an integer for field '{full_field_name}'.")
|
| 290 |
+
else:
|
| 291 |
+
# Generic field setting for non-Values types
|
| 292 |
+
user_profile.set_field(attribute_type, field, raw_value)
|
| 293 |
+
|
| 294 |
+
user_profiles.append(user_profile)
|
| 295 |
+
|
| 296 |
+
print(f"User profiles successfully read from {respondent_details_file}")
|
| 297 |
+
return user_profiles
|
| 298 |
+
|
| 299 |
+
|
| 300 |
+
class UserProfileDetail:
|
| 301 |
+
def __init__(self, key, original_value, qa_check, value):
|
| 302 |
+
"""
|
| 303 |
+
Initialize a UserProfileDetail entry.
|
| 304 |
+
"""
|
| 305 |
+
self.key = key
|
| 306 |
+
self.original_value = original_value
|
| 307 |
+
self.qa_check = qa_check
|
| 308 |
+
self.value = value
|
| 309 |
+
|
| 310 |
+
def __repr__(self):
|
| 311 |
+
fields = {k: v for k, v in self.__dict__.items() if v and v != "Unable to map"}
|
| 312 |
+
formatted_fields = [f"{k}='{v}'" for k, v in fields.items()]
|
| 313 |
+
return f"{self.__class__.__name__}: " + ", ".join(formatted_fields) + ")"
|
| 314 |
+
|
| 315 |
+
@staticmethod
|
| 316 |
+
def filter_profiles(profiles, key=None, qa_check=None, value=None):
|
| 317 |
+
"""
|
| 318 |
+
Static method to filter user profiles by key, QA check status, or value.
|
| 319 |
+
|
| 320 |
+
Args:
|
| 321 |
+
profiles (list): List of UserProfileDetail objects.
|
| 322 |
+
key (str, optional): The key to filter by.
|
| 323 |
+
qa_check (str, optional): The QA check status to filter by.
|
| 324 |
+
value (str, optional): The value to filter by.
|
| 325 |
+
|
| 326 |
+
Returns:
|
| 327 |
+
list: A list of UserProfileDetail entries that match the criteria.
|
| 328 |
+
"""
|
| 329 |
+
return [
|
| 330 |
+
profile for profile in profiles
|
| 331 |
+
if (key is None or profile.key == key) and
|
| 332 |
+
(qa_check is None or profile.qa_check == qa_check) and
|
| 333 |
+
(value is None or profile.value == value)
|
| 334 |
+
]
|
| 335 |
+
|
| 336 |
+
@staticmethod
|
| 337 |
+
def generate_user_profiles(file_path):
|
| 338 |
+
"""
|
| 339 |
+
Static method to generate a list of UserProfileDetail entries from an Excel (.xlsx) file.
|
| 340 |
+
|
| 341 |
+
Args:
|
| 342 |
+
file_path (str): The path to the Excel file containing user profile entries.
|
| 343 |
+
|
| 344 |
+
Returns:
|
| 345 |
+
list: A list of UserProfileDetail objects generated from the file.
|
| 346 |
+
"""
|
| 347 |
+
# Read the Excel file
|
| 348 |
+
df = pd.read_excel(file_path)
|
| 349 |
+
|
| 350 |
+
profiles = []
|
| 351 |
+
for _, row in df.iterrows():
|
| 352 |
+
profile = UserProfileDetail(
|
| 353 |
+
key=row['Key'],
|
| 354 |
+
original_value=row['Value'],
|
| 355 |
+
qa_check=row['QA Check'],
|
| 356 |
+
value=row['Revised Value']
|
| 357 |
+
)
|
| 358 |
+
profiles.append(profile)
|
| 359 |
+
return profiles
|
common/Utilities.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from collections import OrderedDict
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def read_text_file(file_path):
|
| 9 |
+
with open(file_path, 'r') as file:
|
| 10 |
+
content = file.read()
|
| 11 |
+
return content
|
| 12 |
+
|
| 13 |
+
# FILTER FUNCTION
|
| 14 |
+
def filter_profiles_by_input(profiles, data_dictionary):
|
| 15 |
+
"""Interactive filtering with step-by-step criteria selection and data dictionary integration"""
|
| 16 |
+
print("\n=== FILTER SETTINGS ===")
|
| 17 |
+
|
| 18 |
+
# Get column names from the data dictionary
|
| 19 |
+
try:
|
| 20 |
+
dd_columns = data_dictionary.get_columns() # Use get_columns() from your DataDictionary class
|
| 21 |
+
if not dd_columns:
|
| 22 |
+
raise RuntimeError("Data dictionary returned no columns.")
|
| 23 |
+
except Exception as e:
|
| 24 |
+
raise RuntimeError(f"Failed to retrieve columns from data dictionary: {str(e)}")
|
| 25 |
+
|
| 26 |
+
print("Available columns:")
|
| 27 |
+
for col in sorted(dd_columns):
|
| 28 |
+
print(f" β’ {col}")
|
| 29 |
+
|
| 30 |
+
remaining_columns = set(dd_columns)
|
| 31 |
+
filtered_profiles = profiles
|
| 32 |
+
|
| 33 |
+
while True:
|
| 34 |
+
if not remaining_columns:
|
| 35 |
+
print("\nNo more columns available for filtering.")
|
| 36 |
+
break
|
| 37 |
+
|
| 38 |
+
print("\nColumns available to filter on:")
|
| 39 |
+
for col in sorted(remaining_columns):
|
| 40 |
+
print(f" β’ {col}")
|
| 41 |
+
|
| 42 |
+
column = input("\nEnter column name to filter (press Enter to finish): ").strip()
|
| 43 |
+
|
| 44 |
+
if not column:
|
| 45 |
+
break # Stop filtering when user presses Enter
|
| 46 |
+
|
| 47 |
+
if column not in remaining_columns:
|
| 48 |
+
print(f"\nError: Column '{column}' not found or already used for filtering.")
|
| 49 |
+
continue
|
| 50 |
+
|
| 51 |
+
value = input(f"Enter value to filter for '{column}' (press Enter to skip): ").strip()
|
| 52 |
+
|
| 53 |
+
if not value:
|
| 54 |
+
print("\nNo value entered. Skipping this filter.")
|
| 55 |
+
continue
|
| 56 |
+
|
| 57 |
+
new_filtered_profiles = [
|
| 58 |
+
profile for profile in filtered_profiles
|
| 59 |
+
if value.lower() in str(profile.get_attributes().get(column, "")).lower()
|
| 60 |
+
]
|
| 61 |
+
|
| 62 |
+
if not new_filtered_profiles:
|
| 63 |
+
print(f"\nNo matches for '{column}' containing '{value}'. Returning to previous state.")
|
| 64 |
+
continue
|
| 65 |
+
|
| 66 |
+
filtered_profiles = new_filtered_profiles
|
| 67 |
+
remaining_columns.remove(column)
|
| 68 |
+
|
| 69 |
+
print(f"\nFound {len(filtered_profiles)} matching profiles")
|
| 70 |
+
print(f"Profiles filtered out: {len(profiles) - len(filtered_profiles)}")
|
| 71 |
+
|
| 72 |
+
confirm = input("\nProceed with another filter? (Yes/No): ").strip().lower()
|
| 73 |
+
while confirm not in ['yes', 'no']:
|
| 74 |
+
confirm = input("Invalid input. Please enter 'Yes' or 'No': ").strip().lower()
|
| 75 |
+
|
| 76 |
+
if confirm == 'no':
|
| 77 |
+
break
|
| 78 |
+
|
| 79 |
+
return filtered_profiles
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def generate_file_excerpt(file_path, pattern, max_chars=5000):
|
| 84 |
+
# Step 1: Read the file content
|
| 85 |
+
with open(file_path, 'r') as file:
|
| 86 |
+
lines = file.readlines()
|
| 87 |
+
|
| 88 |
+
# Step 2: Extract lines starting with "pattern"
|
| 89 |
+
extracted_lines = [line.replace(pattern, '').strip() for line in lines if line.startswith(pattern) and len(line.split()) >= 6]
|
| 90 |
+
|
| 91 |
+
# Step 3: Join all extracted lines into a single string
|
| 92 |
+
full_text = '\n'.join(extracted_lines)
|
| 93 |
+
|
| 94 |
+
# Step 4: Return the first max_chars characters
|
| 95 |
+
return full_text[-max_chars:] # Taking the last max_chars characters
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def generate_dict_from_file(file_name, column_name1, column_name2):
|
| 99 |
+
df = pd.read_excel(file_name, usecols=[column_name1, column_name2], engine='openpyxl') # Specify the engine
|
| 100 |
+
|
| 101 |
+
# Convert the DataFrame to a dictionary with Questions as keys and Answers as values
|
| 102 |
+
ordered_dict = OrderedDict(zip(df[column_name1], df[column_name2]))
|
| 103 |
+
|
| 104 |
+
return ordered_dict
|
| 105 |
+
|
| 106 |
+
def find_latest_timestamped_file(directory, filename_pattern):
|
| 107 |
+
"""Finds the file with the latest timestamp within a given directory.
|
| 108 |
+
|
| 109 |
+
Args:
|
| 110 |
+
directory: The directory to search for files.
|
| 111 |
+
filename_pattern: The pattern to match filenames (e.g., "interview_results.xlsx").
|
| 112 |
+
|
| 113 |
+
Returns:
|
| 114 |
+
The path to the latest timestamped file, or None if no matching files were found.
|
| 115 |
+
"""
|
| 116 |
+
|
| 117 |
+
files = [f for f in os.listdir(directory) if f.endswith(filename_pattern)]
|
| 118 |
+
if not files:
|
| 119 |
+
print(f"Unable to find file with {filename_pattern} in {directory}")
|
| 120 |
+
return None
|
| 121 |
+
|
| 122 |
+
latest_file = sorted(files, key=lambda f: os.path.getmtime(os.path.join(directory, f)), reverse=True)[0]
|
| 123 |
+
return os.path.join(directory, latest_file)
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def generate_pivot_table(original_table, index, columns, values):
|
| 127 |
+
# Step 1: Flatten all SurveyEntry objects into a DataFrame
|
| 128 |
+
df = pd.json_normalize(entry.dict() for report in original_table for entry in report.Entries)
|
| 129 |
+
|
| 130 |
+
# Step 2: Extract the original order of 'columns' (e.g., questions)
|
| 131 |
+
original_order = df[columns].drop_duplicates().tolist()
|
| 132 |
+
|
| 133 |
+
# Step 3: Pivot the DataFrame
|
| 134 |
+
summary_df = df.pivot(index=index, columns=columns, values=values)
|
| 135 |
+
|
| 136 |
+
# Step 4: Reindex to preserve the original order of columns
|
| 137 |
+
summary_df = summary_df.reindex(columns=original_order).reset_index().fillna("No Response")
|
| 138 |
+
|
| 139 |
+
# Return the summary DataFrame
|
| 140 |
+
return summary_df
|
researchsimulation/InteractiveInterviewChatbot.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pip install groq
|
| 2 |
+
#pip install langchain_groq
|
| 3 |
+
#pip install crewai
|
| 4 |
+
#pip install crewai_tools
|
| 5 |
+
#pip install pydantic
|
| 6 |
+
#pip install XlsxWriter
|
| 7 |
+
#pip install openpyxl
|
| 8 |
+
#pip install pandas
|
| 9 |
+
#pip install streamlit
|
| 10 |
+
|
| 11 |
+
import gradio as gr
|
| 12 |
+
from RespondentAgent import *
|
| 13 |
+
from InterviewSimulation import *
|
| 14 |
+
from langchain_groq import ChatGroq
|
| 15 |
+
|
| 16 |
+
def ask_interview_question(respondent_agent_full, question):
|
| 17 |
+
respondent_agent = respondent_agent_full.agent
|
| 18 |
+
respondent_agent_style = respondent_agent_full.user_profile.style
|
| 19 |
+
respondent_agent_tone = respondent_agent_full.user_profile.tone
|
| 20 |
+
respondent_agent_values = repr(respondent_agent_full.user_profile.values)
|
| 21 |
+
|
| 22 |
+
question_task_description = f"""
|
| 23 |
+
Interview Question: {question}\n
|
| 24 |
+
The Market Research Respondent must answer this exact question in alignment with their values {respondent_agent_values}
|
| 25 |
+
This question may consist of multiple parts, but it should not be split apart or modified in any way.
|
| 26 |
+
The answer must be based solely on the Market Research Respondent's knowledge and backstory.
|
| 27 |
+
The Market Research Respondent should not use any external sources or tools.
|
| 28 |
+
The Market Research Respondent should refer to the provided search text if needed.
|
| 29 |
+
Under no circumstances should an answer be selected that contradicts or is inconsistent with the respondent's profile.
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
question_task_expected_output = f"""
|
| 33 |
+
<answer>, the Market Research Respondent's answer to the exact question: '{question}'
|
| 34 |
+
No changes to the question are allowed. None of the response(s) selected should contradict or be inconsistent with what is expected from the respondent.
|
| 35 |
+
Answer should be expressed using the Market Respondent's style: {respondent_agent_style}
|
| 36 |
+
and in the Market Respondent's tone: {respondent_agent_tone}
|
| 37 |
+
"""
|
| 38 |
+
|
| 39 |
+
question_task = Task(
|
| 40 |
+
description=question_task_description,
|
| 41 |
+
expected_output=question_task_expected_output,
|
| 42 |
+
agent=respondent_agent
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
# Create and execute the crew for this question and report
|
| 46 |
+
crew = Crew(
|
| 47 |
+
agents=[respondent_agent],
|
| 48 |
+
tasks=[question_task],
|
| 49 |
+
process=Process.sequential
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
try:
|
| 53 |
+
crew_output = crew.kickoff()
|
| 54 |
+
|
| 55 |
+
task_output = question_task.output
|
| 56 |
+
|
| 57 |
+
if task_output.raw:
|
| 58 |
+
answer = task_output.raw
|
| 59 |
+
return answer
|
| 60 |
+
else:
|
| 61 |
+
print("No raw task output data")
|
| 62 |
+
except Exception as e:
|
| 63 |
+
exc_type, exc_value, exc_traceback = sys.exc_info()
|
| 64 |
+
print("Exception type:", exc_type)
|
| 65 |
+
print("Exception message:", exc_value)
|
| 66 |
+
print("Traceback details:")
|
| 67 |
+
traceback.print_tb(exc_traceback)
|
| 68 |
+
|
| 69 |
+
# MAIN
|
| 70 |
+
Config.load_environment("..", "chatbot")
|
| 71 |
+
|
| 72 |
+
# SET UP LLAMA
|
| 73 |
+
fact_based_llm = ChatGroq(
|
| 74 |
+
groq_api_key=Config.groq_api_key,
|
| 75 |
+
model_name=Config.agent_model,
|
| 76 |
+
temperature=0.1, # Low temperature for deterministic output
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
# generate respondent summary data from file
|
| 80 |
+
respondent_agent_user_profiles = UserProfile.read_user_profiles_from_excel(Config.respondent_summary_file)
|
| 81 |
+
|
| 82 |
+
user_profile = respondent_agent_user_profiles[0]
|
| 83 |
+
respondent_agent_detail_file = f"{Config.config_dir}/{user_profile.ID}_fast_facts.xlsx"
|
| 84 |
+
respondent_agent = RespondentAgent.create(user_profile, respondent_agent_detail_file, fact_based_llm)
|
| 85 |
+
|
| 86 |
+
if user_profile.name is not None:
|
| 87 |
+
respondent_agent_name = user_profile.name
|
| 88 |
+
else:
|
| 89 |
+
respondent_agent_name = respondent_agent.ID
|
| 90 |
+
|
| 91 |
+
# Example chatbot function
|
| 92 |
+
def chatbot_interface(message, history=[]):
|
| 93 |
+
response = ask_interview_question(respondent_agent, message)
|
| 94 |
+
|
| 95 |
+
# Append user message in the correct format
|
| 96 |
+
history.append({"role": "user", "content": f"You: {message}"})
|
| 97 |
+
# Append respondent's message in the correct format
|
| 98 |
+
history.append({"role": "assistant", "content": f"{respondent_agent_name}: {response}"})
|
| 99 |
+
|
| 100 |
+
# Return updated history and clear the input field
|
| 101 |
+
return history, ""
|
| 102 |
+
|
| 103 |
+
# Create Gradio Interface
|
| 104 |
+
with gr.Blocks() as demo:
|
| 105 |
+
# Header Section
|
| 106 |
+
with gr.Row():
|
| 107 |
+
gr.Markdown(f"## Welcome to PreData.AI's Market Research Panel - you are speaking with {respondent_agent_name}")
|
| 108 |
+
|
| 109 |
+
# Chatbot Section
|
| 110 |
+
chatbot = gr.Chatbot(type="messages", label=None, height=400)
|
| 111 |
+
|
| 112 |
+
# Input Section
|
| 113 |
+
with gr.Row():
|
| 114 |
+
msg = gr.Textbox(placeholder="Ask your question here...")
|
| 115 |
+
|
| 116 |
+
# Footer Section
|
| 117 |
+
with gr.Row():
|
| 118 |
+
gr.Markdown("Β© 2024 PreData.AI - All rights reserved.")
|
| 119 |
+
|
| 120 |
+
# Chatbot Interaction
|
| 121 |
+
msg.submit(chatbot_interface, [msg, chatbot], [chatbot, msg])
|
| 122 |
+
|
| 123 |
+
# Run the Gradio app
|
| 124 |
+
demo.launch(share=True)
|
researchsimulation/InteractiveInterviewSimulation.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pip install groq
|
| 2 |
+
#pip install langchain_groq
|
| 3 |
+
#pip install crewai
|
| 4 |
+
#pip install crewai_tools
|
| 5 |
+
#pip install pydantic
|
| 6 |
+
#pip install XlsxWriter
|
| 7 |
+
#pip install openpyxl
|
| 8 |
+
#pip install pandas
|
| 9 |
+
#pip install streamlit
|
| 10 |
+
|
| 11 |
+
import streamlit as st
|
| 12 |
+
from Config import Config
|
| 13 |
+
from Utilities import *
|
| 14 |
+
from UserProfile import *
|
| 15 |
+
from RespondentAgent import *
|
| 16 |
+
from Interview import *
|
| 17 |
+
from InterviewSimulation import *
|
| 18 |
+
from itertools import islice
|
| 19 |
+
from groq import Groq
|
| 20 |
+
from langchain_groq import ChatGroq
|
| 21 |
+
|
| 22 |
+
def ask_interview_question(respondent_agent_full, question):
|
| 23 |
+
respondent_agent = respondent_agent_full.agent
|
| 24 |
+
respondent_agent_style = respondent_agent_full.user_profile.style
|
| 25 |
+
respondent_agent_tone = respondent_agent_full.user_profile.tone
|
| 26 |
+
respondent_agent_values = repr(respondent_agent_full.user_profile.values)
|
| 27 |
+
|
| 28 |
+
question_task_description = f"""
|
| 29 |
+
Interview Question: {question}\n
|
| 30 |
+
The Market Research Respondent must answer this exact question in alignment with their values {respondent_agent_values}
|
| 31 |
+
This question may consist of multiple parts, but it should not be split apart or modified in any way.
|
| 32 |
+
The answer must be based solely on the Market Research Respondent's knowledge and backstory.
|
| 33 |
+
The Market Research Respondent should not use any external sources or tools.
|
| 34 |
+
The Market Research Respondent should refer to the provided search text if needed.
|
| 35 |
+
Under no circumstances should an answer be selected that contradicts or is inconsistent with the respondent's profile.
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
question_task_expected_output = f"""
|
| 39 |
+
<answer>, the Market Research Respondent's answer to the exact question: '{question}'
|
| 40 |
+
No changes to the question are allowed. None of the response(s) selected should contradict or be inconsistent with what is expected from the respondent.
|
| 41 |
+
Answer should be expressed using the Market Respondent's style: {respondent_agent_style}
|
| 42 |
+
and in the Market Respondent's tone: {respondent_agent_tone}
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
+
question_task = Task(
|
| 46 |
+
description=question_task_description,
|
| 47 |
+
expected_output=question_task_expected_output,
|
| 48 |
+
agent=respondent_agent
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
# Create and execute the crew for this question and report
|
| 52 |
+
crew = Crew(
|
| 53 |
+
agents=[respondent_agent],
|
| 54 |
+
tasks=[question_task],
|
| 55 |
+
process=Process.sequential
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
try:
|
| 59 |
+
crew_output = crew.kickoff()
|
| 60 |
+
|
| 61 |
+
task_output = question_task.output
|
| 62 |
+
|
| 63 |
+
if task_output.raw:
|
| 64 |
+
answer = task_output.raw
|
| 65 |
+
return answer
|
| 66 |
+
else:
|
| 67 |
+
print("No raw task output data")
|
| 68 |
+
except Exception as e:
|
| 69 |
+
exc_type, exc_value, exc_traceback = sys.exc_info()
|
| 70 |
+
print("Exception type:", exc_type)
|
| 71 |
+
print("Exception message:", exc_value)
|
| 72 |
+
print("Traceback details:")
|
| 73 |
+
traceback.print_tb(exc_traceback)
|
| 74 |
+
|
| 75 |
+
# MAIN
|
| 76 |
+
Config.load_environment("..", "dev1")
|
| 77 |
+
|
| 78 |
+
# SET UP LLAMA
|
| 79 |
+
fact_based_llm = ChatGroq(
|
| 80 |
+
groq_api_key=Config.groq_api_key,
|
| 81 |
+
model_name=Config.agent_model,
|
| 82 |
+
temperature=0.1, # Low temperature for deterministic output
|
| 83 |
+
# max_tokens=500, # Enough tokens to complete factual sentences
|
| 84 |
+
# stop_sequences=["\n", "<|endoftext|>"] # Stops at logical sentence boundaries
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
exploratory_llm = ChatGroq(
|
| 88 |
+
groq_api_key=Config.groq_api_key,
|
| 89 |
+
model_name=Config.agent_model,
|
| 90 |
+
temperature=0.9, # Higher temperature for more creative output
|
| 91 |
+
max_tokens=2000, # Allows for more extended, imaginative responses
|
| 92 |
+
stop_sequences=["\n", "<|endoftext|>"] # Standard stop sequences for controlling output length
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
# generate respondent summary data from file
|
| 96 |
+
respondent_agent_user_profiles = UserProfile.read_user_profiles_from_excel(Config.respondent_summary_file)
|
| 97 |
+
|
| 98 |
+
user_profile = respondent_agent_user_profiles[0]
|
| 99 |
+
respondent_agent_detail_file = f"{Config.config_dir}/{user_profile.ID}_fast_facts.xlsx"
|
| 100 |
+
respondent_agent = RespondentAgent.create(user_profile, respondent_agent_detail_file, fact_based_llm)
|
| 101 |
+
|
| 102 |
+
# Streamlit UI
|
| 103 |
+
st.title("Interactive Interview Simulation")
|
| 104 |
+
st.write(f"Using Respondent Profile: {user_profile.name} (ID: {user_profile.ID})")
|
| 105 |
+
|
| 106 |
+
# Question input and response loop
|
| 107 |
+
st.write("Type your interview questions below. To exit, type 'exit'.")
|
| 108 |
+
|
| 109 |
+
if "questions" not in st.session_state:
|
| 110 |
+
st.session_state.questions = []
|
| 111 |
+
if "responses" not in st.session_state:
|
| 112 |
+
st.session_state.responses = []
|
| 113 |
+
|
| 114 |
+
question = st.text_input("Enter your interview question:")
|
| 115 |
+
|
| 116 |
+
if st.button("Ask Question"):
|
| 117 |
+
if question.strip().lower() == "exit":
|
| 118 |
+
st.write("**Session ended. Thank you for using the simulation!**")
|
| 119 |
+
elif question.strip():
|
| 120 |
+
response = ask_interview_question(respondent_agent, question)
|
| 121 |
+
st.session_state.questions.append(question)
|
| 122 |
+
st.session_state.responses.append(response)
|
| 123 |
+
st.write(f"**Q:** {question}")
|
| 124 |
+
st.write(f"**A:** {response}")
|
| 125 |
+
else:
|
| 126 |
+
st.error("Please enter a valid question.")
|
| 127 |
+
|
| 128 |
+
# Display previous questions and answers
|
| 129 |
+
if st.session_state.questions:
|
| 130 |
+
st.write("### Previous Questions and Responses:")
|
| 131 |
+
for q, a in zip(st.session_state.questions, st.session_state.responses):
|
| 132 |
+
st.write(f"- **Q:** {q}")
|
| 133 |
+
st.write(f" **A:** {a}")
|
researchsimulation/InterviewSimulation.py
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from crewai import Agent,Task,Process,Crew
|
| 3 |
+
from crewai_tools import FileReadTool, TXTSearchTool
|
| 4 |
+
from crewai.tasks import OutputFormat
|
| 5 |
+
from pydantic import BaseModel
|
| 6 |
+
from typing import List, Dict, Optional
|
| 7 |
+
|
| 8 |
+
import datetime
|
| 9 |
+
import json
|
| 10 |
+
import os
|
| 11 |
+
import pandas as pd
|
| 12 |
+
import sys
|
| 13 |
+
|
| 14 |
+
from Interview import *
|
| 15 |
+
|
| 16 |
+
#utils
|
| 17 |
+
import re
|
| 18 |
+
import sys
|
| 19 |
+
import traceback
|
| 20 |
+
import json
|
| 21 |
+
import pandas as pd
|
| 22 |
+
from InterviewUtilities import *
|
| 23 |
+
|
| 24 |
+
def select_profiles_by_criteria(profiles, selection_criteria, data_dictionary):
|
| 25 |
+
"""
|
| 26 |
+
Selects profiles matching multiple selection criteria.
|
| 27 |
+
Each criterion must be formatted as 'column:value'.
|
| 28 |
+
Profiles must match ALL criteria (logical AND).
|
| 29 |
+
|
| 30 |
+
Args:
|
| 31 |
+
profiles (list): List of profile objects.
|
| 32 |
+
selection_criteria (list): List of strings ['column1:value1', 'column2:value2', ...].
|
| 33 |
+
If empty, returns all profiles.
|
| 34 |
+
data_dictionary: Data dictionary for column validation.
|
| 35 |
+
|
| 36 |
+
Returns:
|
| 37 |
+
list: Selected profiles.
|
| 38 |
+
"""
|
| 39 |
+
if not selection_criteria:
|
| 40 |
+
print("No selection criteria provided. Returning all profiles.")
|
| 41 |
+
return profiles
|
| 42 |
+
|
| 43 |
+
dd_columns = data_dictionary.get_columns()
|
| 44 |
+
|
| 45 |
+
# Apply each criterion sequentially
|
| 46 |
+
selected_profiles = profiles
|
| 47 |
+
for criterion in selection_criteria:
|
| 48 |
+
try:
|
| 49 |
+
column, value = [part.strip() for part in criterion.split(":", 1)]
|
| 50 |
+
except ValueError:
|
| 51 |
+
raise ValueError(f"Selection criterion '{criterion}' must be formatted as 'column:value'.")
|
| 52 |
+
|
| 53 |
+
if column not in dd_columns:
|
| 54 |
+
raise ValueError(f"Column '{column}' not found in data dictionary.")
|
| 55 |
+
|
| 56 |
+
selected_profiles = [
|
| 57 |
+
profile for profile in selected_profiles
|
| 58 |
+
if value.lower() == str(profile.get_attributes().get(column, "")).strip().lower()
|
| 59 |
+
]
|
| 60 |
+
|
| 61 |
+
print(f"Applied criterion '{column}:{value}' β {len(selected_profiles)} profile(s) selected.")
|
| 62 |
+
|
| 63 |
+
# Early exit if no profiles remain
|
| 64 |
+
if not selected_profiles:
|
| 65 |
+
print("No profiles match the combined criteria.")
|
| 66 |
+
break
|
| 67 |
+
|
| 68 |
+
return selected_profiles
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def run_interview(respondent_agent_full, interview_script, output_file_name, llm, is_focus_group=False):
|
| 73 |
+
interview_report_data = []
|
| 74 |
+
respondent_agent = respondent_agent_full.agent
|
| 75 |
+
|
| 76 |
+
# Loop through each section and question to create individual crews
|
| 77 |
+
for entry in interview_script.Entries:
|
| 78 |
+
num = entry.Num
|
| 79 |
+
section = entry.Section
|
| 80 |
+
question = entry.Question
|
| 81 |
+
|
| 82 |
+
print(f"STARTING process for Section: {section}, Question {num}: {question}")
|
| 83 |
+
|
| 84 |
+
# verbiage for respondent_type
|
| 85 |
+
if is_focus_group:
|
| 86 |
+
respondent_type = "Focus Group"
|
| 87 |
+
diversity_note = "collective voices of the focus group"
|
| 88 |
+
else:
|
| 89 |
+
respondent_type = "Individual User"
|
| 90 |
+
diversity_note = "your unique voice of the individual"
|
| 91 |
+
|
| 92 |
+
question_task_description = f"""
|
| 93 |
+
Interview Section: {section}, Question {num}: {question}
|
| 94 |
+
|
| 95 |
+
### **Your Role & Expectations:**
|
| 96 |
+
You are a {respondent_type} participant with a **specific demographic profile, cultural background, values, lifestyle, and habits**.
|
| 97 |
+
|
| 98 |
+
Your responses must reflect the **{diversity_note}**, and remain grounded in realistic thought patterns, communication styles, and decision-making behavior.
|
| 99 |
+
|
| 100 |
+
---
|
| 101 |
+
|
| 102 |
+
### **How to Answer:**
|
| 103 |
+
- Use a tone appropriate to your role as a {respondent_type}:
|
| 104 |
+
- π£οΈ If you are part of a FOCUS GROUP, speak as a collective group (e.g., βwe prefer...β, βmost of us think...β).
|
| 105 |
+
- π§ If you are an INDIVIDUAL USER, speak from your personal point of view (e.g., βI prefer...β, βin my experience...β).
|
| 106 |
+
- Reference your **real-life JTBD motivations**, including your triggers, immediate needs, and usage barriers.
|
| 107 |
+
- Express yourself in **natural language**βavoid being generic or overly Westernised.
|
| 108 |
+
- Ensure your justification sounds **culturally and contextually grounded** in your background and behavior.
|
| 109 |
+
- If the question is about **your profile, preferences, or past behavior**, only answer using details grounded in your background and JTBD context.
|
| 110 |
+
- If the question is speculative, reflective, or opinion-based, use thoughtful reasoning that aligns with your lifestyle, values, and cultural behavior.
|
| 111 |
+
|
| 112 |
+
---
|
| 113 |
+
|
| 114 |
+
### **Mandatory Personalisation (Choose at least one):**
|
| 115 |
+
Your response MUST include a behavioral or contextual anchor:
|
| 116 |
+
- π **Education & Career Goals**
|
| 117 |
+
- ποΈ **Shopping Behavior**
|
| 118 |
+
- π± **Media Habits**
|
| 119 |
+
- π **Cultural Identity**
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
### **Unacceptable Responses:**
|
| 123 |
+
β Using the wrong voice (e.g., βIβ in a group, βweβ for an individual)
|
| 124 |
+
β Generic, vague, or contradictory answers
|
| 125 |
+
β Contradictions to your personaβs traits, preferences, or profile
|
| 126 |
+
"""
|
| 127 |
+
|
| 128 |
+
expected_output_text = entry.Expected_Output
|
| 129 |
+
if not expected_output_text:
|
| 130 |
+
expected_output_text = "response: The Market Research Respondentβs answer to the exact question: '{question}'. You must not alter or rephrase the question in any way."
|
| 131 |
+
|
| 132 |
+
question_task_expected_output = generate_json_expected_output(expected_output_text)
|
| 133 |
+
|
| 134 |
+
print(f"Expected output is:\n{question_task_expected_output}\n")
|
| 135 |
+
|
| 136 |
+
question_task = Task(
|
| 137 |
+
description=question_task_description,
|
| 138 |
+
expected_output=question_task_expected_output,
|
| 139 |
+
agent=respondent_agent
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
# Create and execute the crew for this question and report
|
| 143 |
+
crew = Crew(
|
| 144 |
+
agents=[respondent_agent],
|
| 145 |
+
tasks=[question_task],
|
| 146 |
+
process=Process.sequential
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
try:
|
| 150 |
+
crew_output = crew.kickoff()
|
| 151 |
+
print(f"Crew usage metrics: {crew.usage_metrics}")
|
| 152 |
+
|
| 153 |
+
response_text = question_task.output.raw
|
| 154 |
+
|
| 155 |
+
if not response_text:
|
| 156 |
+
print("π¨ No raw task output data")
|
| 157 |
+
return None
|
| 158 |
+
|
| 159 |
+
print(f"RAW OUTPUT START:\n {response_text} \nEND OF RAW OUTPUT")
|
| 160 |
+
|
| 161 |
+
detailed_thoughts, json_str = split_json_string(response_text)
|
| 162 |
+
parsed_response = extract_and_parse_json(json_str)
|
| 163 |
+
|
| 164 |
+
# Initialize the base row data
|
| 165 |
+
row_data = {
|
| 166 |
+
'Num': num,
|
| 167 |
+
'Section': section,
|
| 168 |
+
'Question': question,
|
| 169 |
+
'Answer': response_text,
|
| 170 |
+
'Detailed Thoughts': detailed_thoughts
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
if parsed_response:
|
| 174 |
+
print("\nβ
Successfully Parsed JSON:\n", json.dumps(parsed_response, indent=2, ensure_ascii=False))
|
| 175 |
+
fields = extract_fields_from_expected_output(expected_output_text)
|
| 176 |
+
for field in fields:
|
| 177 |
+
row_data[field.lower()] = parsed_response.get(field.lower())
|
| 178 |
+
else:
|
| 179 |
+
print("\nπ¨ No valid JSON extracted - saving raw answer")
|
| 180 |
+
|
| 181 |
+
interview_report_data.append(row_data)
|
| 182 |
+
|
| 183 |
+
except Exception as e:
|
| 184 |
+
exc_type, exc_value, exc_traceback = sys.exc_info()
|
| 185 |
+
print("Exception type:", exc_type)
|
| 186 |
+
print("Exception message:", exc_value)
|
| 187 |
+
print("Traceback details:")
|
| 188 |
+
traceback.print_tb(exc_traceback)
|
| 189 |
+
continue
|
| 190 |
+
|
| 191 |
+
# Convert the list of dictionaries into a DataFrame and save it to Excel
|
| 192 |
+
df = pd.DataFrame(interview_report_data)
|
| 193 |
+
with pd.ExcelWriter(output_file_name, engine='xlsxwriter') as writer:
|
| 194 |
+
df.to_excel(writer, index=False)
|
| 195 |
+
|
| 196 |
+
return df
|