elaineaishophouse commited on
Commit
441d880
Β·
verified Β·
1 Parent(s): 97b46ba

Upload 15 files

Browse files
common/.DS_Store ADDED
Binary file (6.15 kB). View file
 
common/CombineReportsMain.ipynb ADDED
@@ -0,0 +1 @@
 
 
1
+ {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyPaVbmI1rVozLNCS5uIGcEq"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","source":["!pip install dotenv\n","!pip install pydantic\n","!pip install XlsxWriter\n","!pip install openpyxl\n","!pip install pandas\n","!pip install boto3"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"_EsZoeD7g2Ap","executionInfo":{"status":"ok","timestamp":1744207805584,"user_tz":-480,"elapsed":37869,"user":{"displayName":"Elaine Ng","userId":"17781798492345444321"}},"outputId":"1cde6ec1-a6c8-4429-ce02-5f32279f7c58","collapsed":true},"execution_count":1,"outputs":[{"output_type":"stream","name":"stdout","text":["Collecting dotenv\n"," Downloading dotenv-0.9.9-py2.py3-none-any.whl.metadata (279 bytes)\n","Collecting python-dotenv (from dotenv)\n"," Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)\n","Downloading dotenv-0.9.9-py2.py3-none-any.whl (1.9 kB)\n","Downloading python_dotenv-1.1.0-py3-none-any.whl (20 kB)\n","Installing collected packages: python-dotenv, dotenv\n","Successfully installed dotenv-0.9.9 python-dotenv-1.1.0\n","Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (2.11.2)\n","Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic) (0.7.0)\n","Requirement already satisfied: pydantic-core==2.33.1 in /usr/local/lib/python3.11/dist-packages (from pydantic) (2.33.1)\n","Requirement already satisfied: typing-extensions>=4.12.2 in /usr/local/lib/python3.11/dist-packages (from pydantic) (4.13.1)\n","Requirement already satisfied: typing-inspection>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from pydantic) (0.4.0)\n","Collecting XlsxWriter\n"," Downloading XlsxWriter-3.2.2-py3-none-any.whl.metadata (2.8 kB)\n","Downloading XlsxWriter-3.2.2-py3-none-any.whl (165 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m165.1/165.1 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hInstalling collected packages: XlsxWriter\n","Successfully installed XlsxWriter-3.2.2\n","Requirement already satisfied: openpyxl in /usr/local/lib/python3.11/dist-packages (3.1.5)\n","Requirement already satisfied: et-xmlfile in /usr/local/lib/python3.11/dist-packages (from openpyxl) (2.0.0)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (2.2.2)\n","Requirement already satisfied: numpy>=1.23.2 in /usr/local/lib/python3.11/dist-packages (from pandas) (2.0.2)\n","Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas) (2.8.2)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas) (2025.2)\n","Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas) (2025.2)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n","Collecting boto3\n"," Downloading boto3-1.37.30-py3-none-any.whl.metadata (6.7 kB)\n","Collecting botocore<1.38.0,>=1.37.30 (from boto3)\n"," Downloading botocore-1.37.30-py3-none-any.whl.metadata (5.7 kB)\n","Collecting jmespath<2.0.0,>=0.7.1 (from boto3)\n"," Downloading jmespath-1.0.1-py3-none-any.whl.metadata (7.6 kB)\n","Collecting s3transfer<0.12.0,>=0.11.0 (from boto3)\n"," Downloading s3transfer-0.11.4-py3-none-any.whl.metadata (1.7 kB)\n","Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /usr/local/lib/python3.11/dist-packages (from botocore<1.38.0,>=1.37.30->boto3) (2.8.2)\n","Requirement already satisfied: urllib3!=2.2.0,<3,>=1.25.4 in /usr/local/lib/python3.11/dist-packages (from botocore<1.38.0,>=1.37.30->boto3) (2.3.0)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.38.0,>=1.37.30->boto3) (1.17.0)\n","Downloading boto3-1.37.30-py3-none-any.whl (139 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.6/139.6 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading botocore-1.37.30-py3-none-any.whl (13.5 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m96.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading jmespath-1.0.1-py3-none-any.whl (20 kB)\n","Downloading s3transfer-0.11.4-py3-none-any.whl (84 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.4/84.4 kB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hInstalling collected packages: jmespath, botocore, s3transfer, boto3\n","Successfully installed boto3-1.37.30 botocore-1.37.30 jmespath-1.0.1 s3transfer-0.11.4\n"]}]},{"cell_type":"code","execution_count":2,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"v7z1LWRyz-bh","outputId":"f235fa7a-6b34-4e2f-b4ea-2f676b96027a","executionInfo":{"status":"ok","timestamp":1744207959003,"user_tz":-480,"elapsed":153406,"user":{"displayName":"Elaine Ng","userId":"17781798492345444321"}}},"outputs":[{"output_type":"stream","name":"stdout","text":["Mounted at /content/drive\n","/content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/common\n","Environment Name: itc_frozenfood.dev1\n","Number of Respondents: 200\n","Number of Focus Groups: 200\n","Base Directory: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/.\n","Config Directory: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood\n","Test Result Directory: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/tests\n","Input Directory: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/input\n","Output Directory: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output\n","Respondent Summary File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/RawTranscriptList.xlsx\n","Focus Group Summary File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/FocusGroupProfiles.xlsx\n","Personality Question File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/Personality_Assessment.xlsx\n","Respondent Details File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/UserProfiles.xlsx\n","Data Dictionary File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/DataDictionary.xlsx\n","Personality Scoring File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/schwartz_values_scoring.txt\n","Style Tone Question File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/None\n","Interview Question File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/interview_questions_full.xlsx\n","Survey Question File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/survey_questions.xlsx\n","Interview Validation Files: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/None\n","Agent Model: groq/deepseek-r1-distill-llama-70b\n","GROQ API Key: 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\n","Model: gpt-4o\n","Open API Key: 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\n","Open Router API Key: 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\n","Processing Model Type: None\n","Processing Model Name: None\n","Processing Model API Key: None\n","Directory exists /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19\n","Processing agent P1_B2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B2_interview_results.xlsx\n","Processing agent P1_B3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B3_interview_results.xlsx\n","Processing agent P1_B4_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B4_1_interview_results.xlsx\n","Processing agent P1_B4_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B4_2_interview_results.xlsx\n","Processing agent P1_B4_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B4_3_interview_results.xlsx\n","Processing agent P1_B4_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B4_4_interview_results.xlsx\n","Processing agent P1_B4_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B4_5_interview_results.xlsx\n","Processing agent P1_B5_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B5_1_interview_results.xlsx\n","Processing agent P1_B5_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B5_2_interview_results.xlsx\n","Processing agent P1_B5_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B5_3_interview_results.xlsx\n","Processing agent P1_B5_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B5_4_interview_results.xlsx\n","Processing agent P1_B5_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B5_5_interview_results.xlsx\n","Processing agent P1_B6_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B6_1_interview_results.xlsx\n","Processing agent P1_B6_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B6_2_interview_results.xlsx\n","Processing agent P1_B6_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B6_3_interview_results.xlsx\n","Processing agent P1_B6_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B6_4_interview_results.xlsx\n","Processing agent P1_B6_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B6_5_interview_results.xlsx\n","Processing agent P1_D1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D1_interview_results.xlsx\n","Processing agent P1_D2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D2_interview_results.xlsx\n","Processing agent P1_D3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D3_interview_results.xlsx\n","Processing agent P1_D4_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D4_1_interview_results.xlsx\n","Processing agent P1_D4_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D4_2_interview_results.xlsx\n","Processing agent P1_D4_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D4_3_interview_results.xlsx\n","Processing agent P1_D4_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D4_4_interview_results.xlsx\n","Processing agent P1_D4_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D4_5_interview_results.xlsx\n","Processing agent P1_D5_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D5_1_interview_results.xlsx\n","Processing agent P1_D5_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D5_2_interview_results.xlsx\n","Processing agent P1_D5_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D5_3_interview_results.xlsx\n","Processing agent P1_D5_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D5_4_interview_results.xlsx\n","Processing agent P1_D5_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D5_5_interview_results.xlsx\n","Processing agent P1_D6_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D6_1_interview_results.xlsx\n","Processing agent P1_D6_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D6_2_interview_results.xlsx\n","Processing agent P1_D6_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D6_3_interview_results.xlsx\n","Processing agent P1_D6_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D6_4_interview_results.xlsx\n","Processing agent P1_D6_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D6_5_interview_results.xlsx\n","Processing agent P2_B1_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B1_1_interview_results.xlsx\n","Processing agent P2_B1_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B1_2_interview_results.xlsx\n","Processing agent P2_B1_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B1_3_interview_results.xlsx\n","Processing agent P2_B1_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B1_4_interview_results.xlsx\n","Processing agent P2_B1_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B1_5_interview_results.xlsx\n","Processing agent P2_B2_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B2_1_interview_results.xlsx\n","Processing agent P2_B2_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B2_2_interview_results.xlsx\n","Processing agent P2_B2_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B2_3_interview_results.xlsx\n","Processing agent P2_B2_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B2_4_interview_results.xlsx\n","Processing agent P2_B4_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B4_1_interview_results.xlsx\n","Processing agent P2_B4_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B4_2_interview_results.xlsx\n","Processing agent P2_B4_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B4_3_interview_results.xlsx\n","Processing agent P2_B4_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B4_4_interview_results.xlsx\n","Processing agent P2_B4_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B4_5_interview_results.xlsx\n","Processing agent P2_B5_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B5_1_interview_results.xlsx\n","Processing agent P2_B5_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B5_2_interview_results.xlsx\n","Processing agent P2_B5_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B5_3_interview_results.xlsx\n","Processing agent P2_B5_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B5_4_interview_results.xlsx\n","Processing agent P2_B5_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B5_5_interview_results.xlsx\n","Processing agent P2_B6_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B6_1_interview_results.xlsx\n","Processing agent P2_B6_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B6_2_interview_results.xlsx\n","Processing agent P2_B6_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B6_3_interview_results.xlsx\n","Processing agent P2_B6_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B6_4_interview_results.xlsx\n","Processing agent P2_B6_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B6_5_interview_results.xlsx\n","Processing agent P2_D1_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D1_1_interview_results.xlsx\n","Processing agent P2_D1_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D1_2_interview_results.xlsx\n","Processing agent P2_D1_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D1_3_interview_results.xlsx\n","Processing agent P2_D1_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D1_4_interview_results.xlsx\n","Processing agent P2_D1_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D1_5_interview_results.xlsx\n","Processing agent P1_B1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B1_interview_results.xlsx\n","Processing agent P2_D2_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D2_1_interview_results.xlsx\n","Processing agent P2_D2_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D2_2_interview_results.xlsx\n","Processing agent P2_D2_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D2_3_interview_results.xlsx\n","Processing agent P2_D2_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D2_4_interview_results.xlsx\n","Processing agent P2_D2_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D2_5_interview_results.xlsx\n","Processing agent P2_D3_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D3_1_interview_results.xlsx\n","Processing agent P2_D3_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D3_2_interview_results.xlsx\n","Processing agent P2_D3_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D3_3_interview_results.xlsx\n","Processing agent P2_D3_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D3_4_interview_results.xlsx\n","Processing agent P2_D3_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D3_5_interview_results.xlsx\n","Processing agent P2_D4_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D4_1_interview_results.xlsx\n","Processing agent P2_D4_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D4_2_interview_results.xlsx\n","Processing agent P2_D4_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D4_3_interview_results.xlsx\n","Processing agent P2_D4_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D4_4_interview_results.xlsx\n","Processing agent P2_D4_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D4_5_interview_results.xlsx\n","Processing agent P2_D5_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D5_1_interview_results.xlsx\n","Processing agent P2_D5_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D5_2_interview_results.xlsx\n","Processing agent P2_D5_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D5_3_interview_results.xlsx\n","Processing agent P2_D5_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D5_4_interview_results.xlsx\n","Processing agent P2_D5_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D5_5_interview_results.xlsx\n","Processing agent P2_D6_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D6_2_interview_results.xlsx\n","Processing agent P2_D6_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D6_1_interview_results.xlsx\n","Processing agent P2_D6_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D6_3_interview_results.xlsx\n","Processing agent P2_D6_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D6_4_interview_results.xlsx\n","Processing agent P2_D6_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D6_5_interview_results.xlsx\n","Processing agent P2_D7_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D7_1_interview_results.xlsx\n","Processing agent P2_D7_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D7_2_interview_results.xlsx\n","Processing agent P2_D7_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D7_3_interview_results.xlsx\n","Processing agent P2_D7_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D7_4_interview_results.xlsx\n","Processing agent P2_D7_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D7_5_interview_results.xlsx\n","βœ… All reports written to /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/interview_results_FULL_REPORT.xlsx\n"]}],"source":["from google.colab import drive\n","import sys\n","import os\n","import time\n","\n","drive.mount('/content/drive')\n","\n","base_dir = '/content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/.'\n","common_dir = f'{base_dir}/common'\n","run_dir = f'{base_dir}/common'\n","\n","sys.path.append(common_dir)\n","%cd {run_dir}\n","\n","from Config import Config\n","from Utilities import *\n","from UserProfile import *\n","from Interview import *\n","from PersonalityValues import *\n","from itertools import islice\n","\n","now = datetime.datetime.now()\n","timestamp = now.strftime(\"%m-%d_%H-%M\")\n","\n","# MAIN\n","Config.load_environment(base_dir, \"itc_frozenfood.dev1\")\n","Config.print_environment()\n","\n","# Specify report directory\n","report_type = \"interview_results\"\n","report_dir = f\"{Config.output_dir}/interviewresponses_04-08_10-19\"\n","full_report_file = f\"{report_dir}/{report_type}_FULL_REPORT.xlsx\"\n","\n","# Initialize an empty DataFrame to store the combined interview responses\n","full_report_df = None\n","\n","if os.path.exists(report_dir):\n"," print(f\"Directory exists {report_dir}\")\n","else:\n"," print(f\"Directory does not exist {report_dir}\")\n"," sys.exit()\n","\n","report_files = [\n"," filename for filename in os.listdir(report_dir)\n"," if f\"{report_type}.xlsx\" in filename\n"," ]\n","\n","if not report_files:\n"," print(f\"No report files named *{report_type}* were found in {report_dir}\")\n"," sys.exit()\n","\n","for report_file in report_files:\n"," respondent_agent_id = report_file.split(f\"_{report_type}\")[0]\n"," print(f\"Processing agent {respondent_agent_id}\")\n"," interview_output_file = f'{report_dir}/{respondent_agent_id}_interview_results.xlsx'\n","\n"," if os.path.exists(interview_output_file):\n"," print(f\"Processing file: {interview_output_file}\")\n"," interview_response_df = pd.read_excel(interview_output_file)\n"," # Add \"Report Name\" column **before appending** (Pandas automatically places it first)\n"," interview_response_df.insert(0, \"Respondent Agent\", respondent_agent_id)\n","\n"," # Append to the full DataFrame\n"," full_report_df = (\n"," interview_response_df if full_report_df is None\n"," else pd.concat([full_report_df, interview_response_df], ignore_index=True)\n"," )\n"," else:\n"," print(f\"File '{interview_output_file}' does not exist.\")\n","\n","\n","if full_report_df is not None and not full_report_df.empty:\n"," full_report_df.to_excel(full_report_file, index=False)\n"," print(f\"βœ… All reports written to {full_report_file}\")\n","else:\n"," print(f\"⚠️ No reports were processed. {full_report_file} was not created.\")"]},{"cell_type":"markdown","source":[],"metadata":{"id":"8eaYo-b24Wvg"}}]}
common/Config.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import os
3
+
4
+ class Config:
5
+ # === General Environment Info ===
6
+ env_name = None
7
+ num_respondents = None
8
+ num_focus_groups = None
9
+
10
+ # === Directories and Files ===
11
+ base_dir = None
12
+ config_dir = None
13
+ test_result_dir = None
14
+ input_dir = None
15
+ output_dir = None
16
+ respondent_summary_file = None
17
+ focus_group_summary_file = None
18
+ respondent_details_file = None
19
+ data_dictionary_file = None
20
+ personality_question_file = None
21
+ personality_scoring_file = None
22
+ style_tone_question_file = None
23
+ interview_question_file = None
24
+ survey_question_file = None
25
+ interview_validation_files = None
26
+
27
+ # === Respondent Agent Configs ===
28
+ respondent_agent_host = None
29
+ respondent_agent_model = None
30
+ respondent_agent_api_key = None
31
+ respondent_agent_url = None
32
+ respondent_agent_temperature = None
33
+ respondent_agent_top_p = None
34
+ respondent_agent_frequency_penalty = None
35
+ respondent_agent_presence_penalty = None
36
+
37
+ # === Processing Agent Configs ===
38
+ processing_agent_host = None
39
+ processing_agent_model = None
40
+ processing_agent_api_key = None
41
+ processing_agent_url = None
42
+ processing_agent_temperature = None
43
+ processing_agent_top_p = None
44
+ processing_agent_frequency_penalty = None
45
+ processing_agent_presence_penalty = None
46
+
47
+ # === Processor Configs ===
48
+ processor_host = None
49
+ processor_model = None
50
+ processor_api_key = None
51
+ processor_url = None
52
+ processor_temperature = None
53
+ processor_top_p = None
54
+ processor_frequency_penalty = None
55
+ processor_presence_penalty = None
56
+
57
+
58
+ # Function to load the environment variables based on the given environment name
59
+ @classmethod
60
+ def load_environment(cls, base_dir, my_env_name):
61
+ # Determine the path to the .env file based on the environment name
62
+ env_file = f'{base_dir}/config/{my_env_name}.env' # Update the base path as needed
63
+
64
+ # Load the environment variables from the specified .env file
65
+ load_dotenv(dotenv_path=env_file)
66
+
67
+ cls.base_dir = base_dir
68
+ cls.env_name = my_env_name
69
+ cls.num_respondents = int(os.getenv('NUM_RESPONDENTS', 0))
70
+ cls.num_focus_groups = int(os.getenv('NUM_FOCUS_GROUPS', 0))
71
+
72
+ # Construct paths based on BASE_DIR and subdirectories/filenames
73
+ cls.config_dir = f"{base_dir}/{os.getenv('CONFIG_SUBDIR')}"
74
+ cls.test_result_dir = f"{base_dir}/{os.getenv('TEST_SUBDIR')}"
75
+ cls.input_dir = f"{base_dir}/{os.getenv('INPUT_SUBDIR')}"
76
+ cls.output_dir = f"{base_dir}/{os.getenv('OUTPUT_SUBDIR')}"
77
+ cls.respondent_summary_file = f"{cls.config_dir}/{os.getenv('RESPONDENT_SUMMARY_FILE')}"
78
+ cls.focus_group_summary_file = f"{cls.config_dir}/{os.getenv('FOCUS_GROUP_SUMMARY_FILE')}"
79
+ cls.respondent_details_file = f"{cls.config_dir}/{os.getenv('RESPONDENT_DETAILS_FILE')}"
80
+ cls.data_dictionary_file = f"{cls.config_dir}/{os.getenv('DATA_DICTIONARY_FILE')}"
81
+ cls.personality_question_file = f"{cls.config_dir}/{os.getenv('PERSONALITY_QUESTION_FILE')}"
82
+ cls.personality_scoring_file = f"{cls.config_dir}/{os.getenv('PERSONALITY_SCORING_FILE')}"
83
+ cls.style_tone_question_file = f"{cls.config_dir}/{os.getenv('STYLE_TONE_QUESTION_FILE')}"
84
+ cls.interview_question_file = f"{cls.config_dir}/{os.getenv('INTERVIEW_QUESTION_FILE')}"
85
+ cls.survey_question_file = f"{cls.config_dir}/{os.getenv('SURVEY_QUESTION_FILE')}"
86
+ cls.interview_validation_files = f"{cls.config_dir}/{os.getenv('INTERVIEW_VALIDATION_FILES')}"
87
+
88
+ # Respondent Agent Model: Load the environment variables, API keys, and parameters
89
+ cls.respondent_agent_host = os.getenv(os.getenv("RESPONDENT_AGENT_HOST"))
90
+ cls.respondent_agent_model = os.getenv(os.getenv("RESPONDENT_AGENT_MODEL"))
91
+
92
+ respondent_agent_prefix = (lambda: os.getenv('RESPONDENT_AGENT_HOST').replace('_AGENT_HOST', ''))()
93
+ cls.respondent_agent_api_key = os.getenv(f"{respondent_agent_prefix}_API_KEY")
94
+ cls.respondent_agent_url = os.getenv(f"{respondent_agent_prefix}_URL")
95
+
96
+ cls.respondent_agent_temperature = float(os.getenv(f"{respondent_agent_prefix}_TEMPERATURE", 0.0))
97
+ cls.respondent_agent_top_p = float(os.getenv(f"{respondent_agent_prefix}_TOP_P", 0.0))
98
+ cls.respondent_agent_frequency_penalty = float(os.getenv(f"{respondent_agent_prefix}_FREQUENCY_PENALTY", 0.0))
99
+ cls.respondent_agent_presence_penalty = float(os.getenv(f"{respondent_agent_prefix}_PRESENCE_PENALTY", 0.0))
100
+
101
+ # Processing Agent Model: Load the environment variables, API keys, and parameters
102
+ cls.processing_agent_host = os.getenv(os.getenv("PROCESSING_AGENT_HOST"))
103
+ cls.processing_agent_model = os.getenv(os.getenv("PROCESSING_AGENT_MODEL"))
104
+
105
+ processing_agent_prefix = (lambda: os.getenv('PROCESSING_AGENT_HOST').replace('_AGENT_HOST', ''))()
106
+ cls.processing_agent_api_key = os.getenv(f"{processing_agent_prefix}_API_KEY")
107
+ cls.processing_agent_url = os.getenv(f"{processing_agent_prefix}_URL")
108
+
109
+ cls.processing_agent_temperature = float(os.getenv(f"{processing_agent_prefix}_TEMPERATURE", 0.0))
110
+ cls.processing_agent_top_p = float(os.getenv(f"{processing_agent_prefix}_TOP_P", 0.0))
111
+ cls.processing_agent_frequency_penalty = float(os.getenv(f"{processing_agent_prefix}_FREQUENCY_PENALTY", 0.0))
112
+ cls.processing_agent_presence_penalty = float(os.getenv(f"{processing_agent_prefix}_PRESENCE_PENALTY", 0.0))
113
+
114
+ # Processor Model: Load the environment variables, API keys, and parameters
115
+ cls.processor_host = os.getenv(os.getenv("PROCESSOR_HOST"))
116
+ cls.processor_model = os.getenv(os.getenv("PROCESSOR_MODEL"))
117
+
118
+ processor_prefix = (lambda: os.getenv('PROCESSOR_HOST').replace('_AGENT_HOST', ''))()
119
+ cls.processor_api_key = os.getenv(f"{processor_prefix}_API_KEY")
120
+ cls.processor_url = os.getenv(f"{processor_prefix}_URL")
121
+
122
+ cls.processor_temperature = float(os.getenv(f"{processor_prefix}_TEMPERATURE", 0.0))
123
+ cls.processor_top_p = float(os.getenv(f"{processor_prefix}_TOP_P", 0.0))
124
+ cls.processor_frequency_penalty = float(os.getenv(f"{processor_prefix}_FREQUENCY_PENALTY", 0.0))
125
+ cls.processor_presence_penalty = float(os.getenv(f"{processor_prefix}_PRESENCE_PENALTY", 0.0))
126
+
127
+
128
+ @classmethod
129
+ def print_environment(cls):
130
+
131
+ print("ENVIRONMENT CONFIGURATION")
132
+ print(f"Environment Name: {cls.env_name}")
133
+ print(f"Number of Respondents: {cls.num_respondents}")
134
+ print(f"Number of Focus Groups: {cls.num_focus_groups}")
135
+
136
+ print("\nDIRECTORIES:")
137
+ print(f"Base Directory: {cls.base_dir}")
138
+ print(f"Config Directory: {cls.config_dir}")
139
+ print(f"Test Result Directory: {cls.test_result_dir}")
140
+ print(f"Input Directory: {cls.input_dir}")
141
+ print(f"Output Directory: {cls.output_dir}")
142
+
143
+ print("\nFILES:")
144
+ print(f"Respondent Summary File: {cls.respondent_summary_file}")
145
+ print(f"Focus Group Summary File: {cls.focus_group_summary_file}")
146
+ print(f"Personality Question File: {cls.personality_question_file}")
147
+ print(f"Respondent Details File: {cls.respondent_details_file}")
148
+ print(f"Data Dictionary File: {cls.data_dictionary_file}")
149
+ print(f"Personality Scoring File: {cls.personality_scoring_file}")
150
+ print(f"Style Tone Question File: {cls.style_tone_question_file}")
151
+ print(f"Interview Question File: {cls.interview_question_file}")
152
+ print(f"Survey Question File: {cls.survey_question_file}")
153
+ print(f"Interview Validation Files: {cls.interview_validation_files}")
154
+
155
+ print("\nRESPONDENT AGENT CONFIGS")
156
+ print(f"Respondent Agent Host: {cls.respondent_agent_host}")
157
+ print(f"Respondent Agent Model: {cls.respondent_agent_model}")
158
+ print(f"Respondent Agent API Key: {cls.respondent_agent_api_key}")
159
+ print(f"Respondent Agent URL: {cls.respondent_agent_url}")
160
+ print(f"Respondent Agent Temperature: {cls.respondent_agent_temperature}")
161
+ print(f"Respondent Agent Top P: {cls.respondent_agent_top_p}")
162
+ print(f"Respondent Agent Frequency Penalty: {cls.respondent_agent_frequency_penalty}")
163
+ print(f"Respondent Agent Presence Penalty: {cls.respondent_agent_presence_penalty}")
164
+
165
+ print("\nPROCESSING AGENT CONFIGS")
166
+ print(f"Processing Agent Host: {cls.processing_agent_host}")
167
+ print(f"Processing Agent Name: {cls.processing_agent_model}")
168
+ print(f"Processing Agent API Key: {cls.processing_agent_api_key}")
169
+ print(f"Processing Agent URL: {cls.processing_agent_url}")
170
+ print(f"Processing Agent Temperature: {cls.processing_agent_temperature}")
171
+ print(f"Processing Agent Top P: {cls.processing_agent_top_p}")
172
+ print(f"Processing Agent Frequency Penalty: {cls.processing_agent_frequency_penalty}")
173
+ print(f"Processing Agent Presence Penalty: {cls.processing_agent_presence_penalty}")
174
+
175
+ print("\nPROCESSOR CONFIGS")
176
+ print(f"Processor Host: {cls.processor_host}")
177
+ print(f"Processor Name: {cls.processor_model}")
178
+ print(f"Processor API Key: {cls.processor_api_key}")
179
+ print(f"Processor URL: {cls.processor_url}")
180
+ print(f"Processor Temperature: {cls.processor_temperature}")
181
+ print(f"Processor Top P: {cls.processor_top_p}")
182
+ print(f"Processor Frequency Penalty: {cls.processor_frequency_penalty}")
183
+ print(f"Processor Presence Penalty: {cls.processor_presence_penalty}")
common/DataDictionary.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import datetime
3
+ import textwrap
4
+
5
+ from Config import Config
6
+
7
+ import pandas as pd
8
+ import numpy as np
9
+
10
+ class DataDictionary:
11
+ def __init__(self):
12
+ """
13
+ Initialize the DataDictionary instance with an empty list of entries.
14
+ """
15
+ self.entries = []
16
+
17
+ def add_entry(self, entry):
18
+ """
19
+ Add an entry to the data dictionary. Entry should be a dict with expected keys.
20
+ Filters out None or empty-string values, and ensures required keys are present.
21
+ """
22
+ required_keys = {"Type", "Parameter", "Description"}
23
+ missing = required_keys - entry.keys()
24
+ if missing:
25
+ raise ValueError(f"Missing required fields in entry: {missing}")
26
+
27
+ # Optionally filter or transform the entry
28
+ clean_entry = {k: v for k, v in entry.items() if v is not None and v != ""}
29
+
30
+ self.entries.append(clean_entry)
31
+
32
+
33
+ def get_types(self):
34
+ """
35
+ Extract all types defined for the data dictionary, preserving insertion order.
36
+
37
+ Returns:
38
+ list: A list of all unique types in the dictionary, preserving order.
39
+ """
40
+ seen = set()
41
+ ordered_types = []
42
+ for entry in self.entries:
43
+ Type = entry.get("Type")
44
+ if Type not in seen and Type is not None:
45
+ seen.add(Type)
46
+ ordered_types.append(Type)
47
+ return ordered_types
48
+
49
+ def get_parameters(self, type="All"):
50
+ """
51
+ Extract parameters of a particular type from the data dictionary, preserving insertion order.
52
+
53
+ Args:
54
+ type (str): Type of entries to return (defaults to "All").
55
+
56
+ Returns:
57
+ list: A list of all unique parameters matching the specified type, preserving order.
58
+ """
59
+ seen = set()
60
+ ordered_parameters = []
61
+
62
+ for entry in self.entries:
63
+ if type == "All" or entry["Type"] == type:
64
+ parameter = entry["Parameter"]
65
+ if parameter not in seen:
66
+ seen.add(parameter)
67
+ ordered_parameters.append(parameter)
68
+
69
+ return ordered_parameters
70
+
71
+ def get_columns(self):
72
+ """
73
+ Generate a list of column names in the format type_parameter.
74
+
75
+ Returns:
76
+ list: A list of column names preserving order.
77
+ """
78
+ columns = []
79
+ for entry in self.entries:
80
+ Type = entry["Type"]
81
+ Parameter = entry["Parameter"]
82
+ if Type and Parameter: # Ensure both Type and Parameter exist
83
+ columns.append(f"{Type}_{Parameter}")
84
+ return columns
85
+
86
+ def filter_entries(self, Source=None, Type=None, Parameter=None):
87
+ """
88
+ Filter entries based on Source, Type, or Parameter.
89
+
90
+ Args:
91
+ Source (str, optional): The source to filter by.
92
+ Type (str, optional): The type to filter by.
93
+ Parameter (str, optional): The parameter to filter by.
94
+
95
+ Returns:
96
+ list: A list of entries matching the filter criteria.
97
+ """
98
+ return [
99
+ entry for entry in self.entries
100
+ if (Source is None or entry["Source"] == Source) and
101
+ (Type is None or entry["Type"] == Type) and
102
+ (Parameter is None or entry["Parameter"] == Parameter)
103
+ ]
104
+
105
+ @staticmethod
106
+ def generate_dictionary(data_dictionary_file):
107
+ """
108
+ Static method to generate a DataDictionary instance from an Excel (.xlsx) file.
109
+
110
+ Args:
111
+ data_dictionary_file (str): The path to the Excel file containing data dictionary entries.
112
+
113
+ Returns:
114
+ DataDictionary: A populated DataDictionary instance.
115
+ """
116
+ import pandas as pd # Ensure pandas is imported
117
+ df = pd.read_excel(data_dictionary_file)
118
+
119
+ data_dictionary = DataDictionary()
120
+
121
+ for _, row in df.iterrows():
122
+ data_dictionary.add_entry({
123
+ "Type": row["Type"],
124
+ "Parameter": row["Parameter"],
125
+ "Description": row["Description"],
126
+ "Source": row.get("Source"),
127
+ "ValidValues": row.get("Scoring_Method"),
128
+ "InferredLogic": row.get("Inferred_Logic"),
129
+ })
130
+
131
+ return data_dictionary
132
+
133
+ def __repr__(self):
134
+ return f"DataDictionary({len(self.entries)} entries)"
common/FastFacts.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import datetime
3
+ import textwrap
4
+
5
+ from Config import Config
6
+ from DataDictionary import *
7
+
8
+ import pandas as pd
9
+ import numpy as np
10
+
11
+ class FastFacts:
12
+ def __init__(self):
13
+ self.facts = None # Lazily initialised
14
+
15
+ def add_fact(self, fact):
16
+ """
17
+ Add a single fact to the list, ensuring lazy initialisation.
18
+ """
19
+ if not isinstance(fact, str):
20
+ print("Only strings are allowed as facts.")
21
+ return
22
+
23
+ # Initialise the list if it doesn't exist
24
+ if self.facts is None:
25
+ self.facts = []
26
+
27
+ self.facts.append(fact)
28
+
29
+ def add_facts(self, facts):
30
+ """
31
+ Add multiple facts to the list, ensuring lazy initialisation.
32
+ """
33
+ if not isinstance(facts, (set, list)):
34
+ print("Facts must be provided as a set or list.")
35
+ return
36
+
37
+ # Initialise the list if it doesn't exist
38
+ if self.facts is None:
39
+ self.facts = []
40
+
41
+ for fact in facts:
42
+ if isinstance(fact, str):
43
+ self.facts.append(fact)
44
+ else:
45
+ print(f"Skipping non-string fact: {fact}")
46
+
47
+ def __repr__(self):
48
+ if not self.facts:
49
+ return f"{self.__class__.__name__}: No facts available"
50
+ formatted_facts = ", ".join(f"<{fact}>" for fact in self.facts)
51
+ return f"{self.__class__.__name__}: {formatted_facts}"
52
+
53
+ def to_dict(self):
54
+ """
55
+ Convert the FastFacts to a dictionary. Return an empty list if no facts are available.
56
+ """
57
+ return {"facts": self.facts or []}
58
+
59
+ @staticmethod
60
+ def read_from_excel(fact_file):
61
+ """
62
+ Read facts from an Excel file and populate a FastFacts object.
63
+
64
+ Args:
65
+ fact_file (str): Path to the Excel file.
66
+
67
+ Returns:
68
+ FastFacts: A populated FastFacts object.
69
+ """
70
+ try:
71
+ df = pd.read_excel(fact_file)
72
+ facts_list = df["FastFacts"].dropna().tolist() # Assuming the facts are in a column named 'FastFacts'
73
+
74
+ # Create a FastFacts object and populate it with facts
75
+ fast_facts_obj = FastFacts()
76
+ fast_facts_obj.add_facts(facts_list)
77
+
78
+ return fast_facts_obj
79
+ except Exception as e:
80
+ print(f"An error occurred while reading from the Excel file: {e}")
81
+ return None
common/Interview.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional
2
+ from pydantic import BaseModel
3
+ import pandas as pd
4
+ from itertools import groupby
5
+
6
+
7
+ class QAEntry(BaseModel):
8
+ Num: int
9
+ Section: str
10
+ Question: str
11
+ Expected_Output: Optional[str]
12
+ Respondent: Optional[str]
13
+ Answer: Optional[str]
14
+
15
+ class InterviewReport(BaseModel):
16
+ Entries: List[QAEntry]
17
+
18
+ def __repr__(self):
19
+ output = ""
20
+ for section, entries in groupby(self.Entries, key=lambda entry: entry.Section):
21
+ output += f"{section}:\n"
22
+ for entry in entries:
23
+ output += f"Q {entry.Num}: {entry.Question}\n"
24
+ output += f"Expected Output: {entry.Expected_Output if entry.Expected_Output else 'No Expected Output'}\n"
25
+ output += f"Respondent: {entry.Respondent if entry.Respondent else 'No Respondent'}\n"
26
+ output += f"A: {entry.Answer if entry.Answer else 'No Answer'}\n"
27
+ return output
28
+
29
+ def get_respondent_responses(self,respondent):
30
+ respondent_entries = [
31
+ entry for entry in self.Entries
32
+ if entry.Respondent and entry.Respondent.lower() == respondent.lower()
33
+ ]
34
+
35
+ return respondent_entries
36
+
37
+ @staticmethod
38
+ def generate_interview_script(interview_file):
39
+ df = pd.read_excel(interview_file)
40
+
41
+ qa_entries = []
42
+ for idx, row in enumerate(df.to_dict('records')):
43
+ print(f"Processing row {idx}: {row}") # Debug: show the full row being processed
44
+
45
+ entry = QAEntry(
46
+ Num = row['Num'],
47
+ Section = row['Section'],
48
+ Question = row['Question'],
49
+ Expected_Output = row.get('Expected_Output') if pd.notna(row.get('Expected_Output')) else None,
50
+ Respondent = None,
51
+ Answer = None
52
+ )
53
+ qa_entries.append(entry)
54
+
55
+ return InterviewReport(Entries = qa_entries)
56
+
57
+
58
+ @staticmethod
59
+ def generate_interview_report(interview_file):
60
+ df = pd.read_excel(interview_file)
61
+
62
+ qa_entries = [
63
+ QAEntry(
64
+ Num = row['Num'],
65
+ Section = row['Section'],
66
+ Question = row['Question'],
67
+ Expected_Output = row.get('Expected_Output') if pd.notna(row.get('Expected_Output')) else "No Expected Output Provided",
68
+ Respondent = row.get('Respondent') if pd.notna(row.get('Respondent')) else "No Respondent Provided",
69
+ Answer = row.get('Answer') if pd.notna(row.get('Answer')) else "No Answer Provided"
70
+ )
71
+ for row in df.to_dict('records')
72
+ ]
73
+
74
+ return InterviewReport(Entries = qa_entries)
75
+
common/InterviewUtilities.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import json
3
+
4
+ def parse_expected_output_fields(expected_output_text):
5
+ """
6
+ Parses expected_output_text into a list of (key, description) tuples.
7
+ """
8
+ fields = []
9
+ lines = expected_output_text.strip().splitlines()
10
+ for line in lines:
11
+ if ':' not in line:
12
+ continue
13
+ key, description = line.split(':', 1)
14
+ fields.append((key.strip(), description.strip()))
15
+ return fields
16
+
17
+ def extract_fields_from_expected_output(expected_output_text):
18
+ """
19
+ Returns just the list of keys (field names) from expected_output_text.
20
+ """
21
+ parsed_fields = parse_expected_output_fields(expected_output_text)
22
+ return [key for key, _ in parsed_fields]
23
+
24
+ def split_json_string(text):
25
+ """
26
+ Best of both worlds:
27
+ - Splits text into 'thought' and 'JSON' parts
28
+ - Scans for all possible { positions
29
+ - Cleans unescaped newlines inside quotes
30
+ - Strips junk between </think> and JSON if JSON exists
31
+ - Preserves full text after </think> if no JSON
32
+ """
33
+
34
+ # Step 1: Split at </think> if exists
35
+ if '</think>' in text:
36
+ thought_part, possible_json_part = text.split('</think>', 1)
37
+ thought_part = thought_part.strip()
38
+ possible_json_part = possible_json_part.strip()
39
+ else:
40
+ thought_part = None
41
+ possible_json_part = text.strip()
42
+
43
+ # Step 2: Find all { positions
44
+ brace_positions = [m.start() for m in re.finditer(r'{', possible_json_part)]
45
+
46
+ # Clean function: fix newlines inside quoted strings
47
+ def clean_json_formatting(text):
48
+ def fix_inside_quotes(match):
49
+ content = match.group(1)
50
+ fixed = content.replace('\n', '\\n').replace('\r', '\\n')
51
+ return f'"{fixed}"'
52
+ return re.sub(r'"(.*?)"', fix_inside_quotes, text, flags=re.DOTALL)
53
+
54
+ for pos in brace_positions:
55
+ candidate = possible_json_part[pos:].strip()
56
+
57
+ # Pre-clean
58
+ candidate = clean_json_formatting(candidate)
59
+
60
+ # Fix double braces if necessary
61
+ if candidate.startswith("{{") and "}}" in candidate:
62
+ candidate = candidate.replace("{{", "{", 1).replace("}}", "}", 1)
63
+
64
+ # Must start with {" or {'
65
+ if not re.match(r'^\{\s*["\']', candidate):
66
+ continue # not real JSON, skip
67
+
68
+ try:
69
+ json.loads(candidate)
70
+ # βœ… Successful parse
71
+ return thought_part, candidate
72
+ except json.JSONDecodeError:
73
+ continue # try next
74
+
75
+ # πŸ›  No valid JSON found β€” return thought and full original remainder (no chopping)
76
+ return thought_part, possible_json_part
77
+
78
+ def extract_and_parse_json(result_text):
79
+ """
80
+ Extracts and parses JSON output, handling cases where JSON is enclosed in triple backticks
81
+ (```json ... ```) or already correctly formatted `{}`.
82
+
83
+ Args:
84
+ result_text (str): The raw text output containing JSON data.
85
+
86
+ Returns:
87
+ dict or None: Parsed JSON object if successful, None otherwise.
88
+ """
89
+ if not result_text:
90
+ print("🚨 No result text data received.")
91
+ return None
92
+
93
+ # πŸ›  Clean unescaped line breaks that often break LLM JSON output
94
+ def clean_json_formatting(text):
95
+ # Replace unescaped newlines with a space
96
+ return re.sub(r'(?<!\\)\n', ' ', text)
97
+
98
+ # βœ… Try parsing directly after cleaning line breaks
99
+ cleaned_direct = clean_json_formatting(result_text)
100
+ try:
101
+ return json.loads(cleaned_direct)
102
+ except json.JSONDecodeError:
103
+ print("Unable to parse cleaned direct JSON.")
104
+ pass
105
+
106
+ # βœ… Try extracting JSON from triple backticks
107
+ match = re.search(r'```json\s*\n({[\s\S]+?})\n```', result_text, re.DOTALL)
108
+ if match:
109
+ try:
110
+ return json.loads(match.group(1).strip())
111
+ except json.JSONDecodeError:
112
+ pass # If still invalid, return None
113
+
114
+ print("🚨 No valid JSON found.")
115
+ return None # No valid JSON detected
116
+
117
+ def generate_json_expected_output(expected_output_text):
118
+ """
119
+ Generates a JSON-style expected output based on expected_output_text.
120
+ """
121
+ parsed_fields = parse_expected_output_fields(expected_output_text)
122
+
123
+ json_fields = []
124
+ for key, description in parsed_fields:
125
+ # Convert to JSON-style key (lowercase, underscores preserved)
126
+ json_key = key.lower()
127
+ json_fields.append(f' "{json_key}": {description},')
128
+
129
+ # Remove trailing comma from the last entry
130
+ if json_fields:
131
+ json_fields[-1] = json_fields[-1].rstrip(',')
132
+
133
+ # Join fields
134
+ json_body = "\n".join(json_fields)
135
+
136
+ output = (
137
+ "You must return your answer strictly in the following JSON format. "
138
+ "Do not include any markdown, commentary, or extra text. The response must be valid JSON:\n\n"
139
+ "{\n"
140
+ f"{json_body}\n"
141
+ "}"
142
+ )
143
+
144
+ return output
common/LLMConfig.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import ChatOpenAI
2
+ from langchain_groq import ChatGroq
3
+ from langchain_together import ChatTogether
4
+ from crewai import LLM
5
+ from Config import Config
6
+ import os
7
+
8
+ # ========== PUBLIC INTERFACE ==========
9
+
10
+ def get_respondent_agent_llm_instance(model_type=None):
11
+
12
+ # Default to Config if model_type is not specified
13
+ if not model_type:
14
+ model_type = Config.respondent_agent_host
15
+ model = Config.respondent_agent_model
16
+ api_key = Config.respondent_agent_api_key
17
+ url = Config.respondent_agent_url
18
+ temperature = Config.respondent_agent_temperature
19
+ top_p = Config.respondent_agent_top_p
20
+ frequency_penalty = Config.respondent_agent_frequency_penalty
21
+ presence_penalty = Config.respondent_agent_presence_penalty
22
+
23
+ # If model_type is specified, determine the prefix (e.g., "GROQ" for model_type="groq") to fetch values from env
24
+ else:
25
+ prefix = model_type.upper()
26
+ model = os.getenv(f"{prefix}_AGENT_MODEL")
27
+ api_key = os.getenv(f"{prefix}_API_KEY")
28
+ url = os.getenv(f"{prefix}_URL")
29
+ temperature = float(os.getenv(f"{prefix}_TEMPERATURE", 0.7))
30
+ top_p = float(os.getenv(f"{prefix}_TOP_P", 1.0))
31
+ frequency_penalty = float(os.getenv(f"{prefix}_FREQUENCY_PENALTY", 0.0))
32
+ presence_penalty = float(os.getenv(f"{prefix}_PRESENCE_PENALTY", 0.0))
33
+
34
+ if not api_key:
35
+ raise ValueError(f"API key not found for model_type={model_type}.")
36
+ if not model:
37
+ raise ValueError(f"Model not found for model_type={model_type}.")
38
+
39
+ print(f"Respondent Agent LLM: model_type={model_type}, model={model}, api_key={'*****' if api_key else 'MISSING'}, url={url}")
40
+ print(f"Params: temperature={temperature}, top_p={top_p}, frequency_penalty={frequency_penalty}, presence_penalty={presence_penalty}")
41
+ return get_crewai_instance(model_type, model, api_key, url, temperature, top_p, frequency_penalty, presence_penalty)
42
+
43
+ def get_processing_agent_llm_instance(model_type=None):
44
+
45
+ # Default to Config if model_type not specified
46
+ if not model_type:
47
+ model_type = Config.processing_agent_host
48
+ model = Config.processing_agent_model
49
+ api_key = Config.processing_agent_api_key
50
+ url = Config.processing_agent_url
51
+ temperature = Config.processing_agent_temperature
52
+ top_p = Config.processing_agent_top_p
53
+ frequency_penalty = Config.processing_agent_frequency_penalty
54
+ presence_penalty = Config.processing_agent_presence_penalty
55
+
56
+ # If model_type is specified, determine the prefix (e.g., "GROQ" for model_type="groq") to fetch values from env
57
+ else:
58
+ prefix = model_type.upper()
59
+ model = os.getenv(f"{prefix}_AGENT_MODEL")
60
+ api_key = os.getenv(f"{prefix}_API_KEY")
61
+ url = os.getenv(f"{prefix}_URL")
62
+ temperature = float(os.getenv(f"{prefix}_TEMPERATURE", 0.7))
63
+ top_p = float(os.getenv(f"{prefix}_TOP_P", 1.0))
64
+ frequency_penalty = float(os.getenv(f"{prefix}_FREQUENCY_PENALTY", 0.0))
65
+ presence_penalty = float(os.getenv(f"{prefix}_PRESENCE_PENALTY", 0.0))
66
+
67
+ if not api_key:
68
+ raise ValueError(f"API key not found for model_type={model_type}.")
69
+ if not model:
70
+ raise ValueError(f"Model not found for model_type={model_type}.")
71
+
72
+ print(f"Processing Agent LLM: model_type={model_type}, model={model}, api_key={'*****' if api_key else 'MISSING'}, url={url}")
73
+ print(f"Params: temperature={temperature}, top_p={top_p}, frequency_penalty={frequency_penalty}, presence_penalty={presence_penalty}")
74
+ return get_crewai_instance(model_type, model, api_key, url, temperature, top_p, frequency_penalty, presence_penalty)
75
+
76
+ def get_processor_llm_instance(model_type=None):
77
+
78
+ # Default to Config if model_type not specified
79
+ if not model_type:
80
+ model_type = Config.processor_host
81
+ model = Config.processor_model
82
+ api_key = Config.processor_api_key
83
+ url = Config.processor_url
84
+ temperature = Config.processor_temperature
85
+ top_p = Config.processor_top_p
86
+ frequency_penalty = Config.processor_frequency_penalty
87
+ presence_penalty = Config.processor_presence_penalty
88
+
89
+ # If model_type is specified, determine the prefix (e.g., "GROQ" for model_type="groq") to fetch values from env
90
+ else:
91
+ prefix = model_type.upper()
92
+ model = os.getenv(f"{prefix}_AGENT_MODEL")
93
+ api_key = os.getenv(f"{prefix}_API_KEY")
94
+ url = os.getenv(f"{prefix}_URL")
95
+ temperature = float(os.getenv(f"{prefix}_TEMPERATURE", 0.7))
96
+ top_p = float(os.getenv(f"{prefix}_TOP_P", 1.0))
97
+ frequency_penalty = float(os.getenv(f"{prefix}_FREQUENCY_PENALTY", 0.0))
98
+ presence_penalty = float(os.getenv(f"{prefix}_PRESENCE_PENALTY", 0.0))
99
+
100
+ if not api_key:
101
+ raise ValueError(f"API key not found for model_type={model_type}.")
102
+ if not model:
103
+ raise ValueError(f"Model not found for model_type={model_type}.")
104
+
105
+ print(f"Processor LLM: model_type={model_type}, model={model}, api_key={'*****' if api_key else 'MISSING'}, url={url}")
106
+ print(f"Params: temperature={temperature}, top_p={top_p}, frequency_penalty={frequency_penalty}, presence_penalty={presence_penalty}")
107
+ return get_langchain_instance(model_type, model, api_key, url, temperature, top_p, frequency_penalty, presence_penalty)
108
+
109
+ # ========== INTERNAL HELPERS ==========
110
+
111
+ def get_crewai_instance(model_type, model, api_key, url, temperature, top_p, frequency_penalty, presence_penalty):
112
+ model_type = model_type.lower()
113
+
114
+ if model_type == 'groq':
115
+ return ChatGroq(groq_api_key=api_key, model_name=f"{model_type}/{model}", temperature=temperature, model_kwargs={})
116
+
117
+ common_args = {
118
+ "temperature": temperature,
119
+ "top_p": top_p,
120
+ "frequency_penalty": frequency_penalty,
121
+ "presence_penalty": presence_penalty
122
+ }
123
+ common_args = {k: v for k, v in common_args.items() if v is not None} # Remove None values
124
+
125
+ if model_type == 'openai':
126
+ return ChatOpenAI(model=model, api_key=api_key, **common_args)
127
+ elif model_type == 'openrouter':
128
+ return ChatOpenAI(base_url=url, model=f"{model_type}/{model}", api_key=api_key, **common_args)
129
+ elif model_type == 'together_ai':
130
+ return LLM(model=f"{model_type}/{model}", api_key=api_key, api_base=url, **common_args)
131
+ else:
132
+ raise ValueError(f"Unsupported model type for CrewAI: {model_type}")
133
+
134
+ def get_langchain_instance(model_type, model, api_key, url, temperature, top_p, frequency_penalty, presence_penalty):
135
+ model_type = model_type.lower()
136
+
137
+ if model_type == 'groq':
138
+ return ChatGroq(groq_api_key=api_key, model_name=model, temperature=temperature, model_kwargs={})
139
+
140
+ common_args = {
141
+ "temperature": temperature,
142
+ "top_p": top_p,
143
+ "frequency_penalty": frequency_penalty,
144
+ "presence_penalty": presence_penalty
145
+ }
146
+ common_args = {k: v for k, v in common_args.items() if v is not None} # Remove None values
147
+
148
+ if model_type == 'openai':
149
+ return ChatOpenAI(model=model, api_key=api_key, **common_args)
150
+ elif model_type == 'openrouter':
151
+ return ChatOpenAI(base_url=url, model=model, api_key=api_key, **common_args)
152
+ elif model_type == 'together_ai':
153
+ return ChatTogether(model=model, together_api_key=api_key, **common_args)
154
+ else:
155
+ raise ValueError(f"Unsupported model type for LangChain: {model_type}")
common/PersonalityValues.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List, Dict, Optional
3
+ from collections import defaultdict
4
+
5
+ import datetime
6
+ import json
7
+ import os
8
+ import pandas as pd
9
+ import re
10
+ import numpy as np
11
+ import pprint
12
+ import math
13
+
14
+ from UserProfile import *
15
+
16
+ class PVEntry(BaseModel):
17
+ Num: int
18
+ Value: str
19
+ Question: str
20
+ Criteria: Optional[str]
21
+ Rating_Definition: Optional[str]
22
+ Adjacent_Values: Optional[List[str]]
23
+ Opposite_Values: Optional[List[str]]
24
+ Answer: Optional[str]
25
+ Score: Optional[int]
26
+ Assessment: Optional[str]
27
+
28
+ def parse_values(value_str: Optional[str], delimiter: str = ",") -> List[str]:
29
+ """
30
+ Parses a delimited string into a list of strings.
31
+ If the value is None or NaN, return an empty list.
32
+
33
+ Args:
34
+ value_str (Optional[str]): The input string to parse.
35
+ delimiter (str): The delimiter to use for splitting. Defaults to ','.
36
+
37
+ Returns:
38
+ List[str]: A list of trimmed strings.
39
+ """
40
+ if pd.isna(value_str) or not isinstance(value_str, str):
41
+ return []
42
+ return [v.strip() for v in value_str.split(delimiter)]
43
+
44
+
45
+ def extract_values_from_assessment_file(assessment_file):
46
+ """
47
+ Extracts and aggregates Value and Score pairs from an Excel file by summing scores.
48
+
49
+ Args:
50
+ assessment_file (str): Path to the Excel file.
51
+
52
+ Returns:
53
+ list: A list of dictionaries with Value and total Score.
54
+ """
55
+ # Read the Excel file
56
+ df = pd.read_excel(assessment_file)
57
+
58
+ # Ensure required columns are present
59
+ if "Value" not in df.columns or "Score" not in df.columns:
60
+ raise ValueError("The file must contain 'Value' and 'Score' columns.")
61
+
62
+ # Clean the data
63
+ df_clean = df[["Value", "Score"]].dropna()
64
+ df_clean["Score"] = pd.to_numeric(df_clean["Score"], errors="coerce")
65
+
66
+ # Group by Value and sum the scores
67
+ aggregated = df_clean.groupby("Value", as_index=False).sum()
68
+
69
+ # Convert to list of dictionaries
70
+ return aggregated.to_dict(orient="records")
71
+
72
+ class PVAssessment(BaseModel):
73
+ Entries: dict[str, list[PVEntry]]
74
+
75
+ @staticmethod
76
+ def generate_personality_assessment(personality_file):
77
+ df = pd.read_excel(personality_file)
78
+
79
+ # Use defaultdict to allow appending multiple PVEntries per value
80
+ entries = defaultdict(list)
81
+
82
+ for _, row in df.iterrows():
83
+ pv_entry = PVEntry(
84
+ Num=row["Num"],
85
+ Value=row["Value"],
86
+ Question=row["Assessment_Question"],
87
+ Criteria=row["Assessment_Criteria"],
88
+ Rating_Definition=row["Rating_Definition"],
89
+ Adjacent_Values=parse_values(row["Adjacent_Values"]),
90
+ Opposite_Values=parse_values(row["Opposite_Values"]),
91
+ Answer=None,
92
+ Score=None,
93
+ Assessment=None
94
+ )
95
+ entries[row["Value"]].append(pv_entry)
96
+
97
+ return PVAssessment(Entries=dict(entries))
98
+
99
+ @staticmethod
100
+ def get_score_definition(value, score, pv_assessment):
101
+ """
102
+ Converts a numerical score (1-50) into a corresponding rating definition.
103
+
104
+ Args:
105
+ value (str): The personality value key.
106
+ score (int): A numerical score between 1 and 50.
107
+ pv_assessment (PVAssessment): The personality assessment object.
108
+
109
+ Returns:
110
+ str: The corresponding rating definition, or an empty string if not found.
111
+ """
112
+ if not isinstance(pv_assessment, PVAssessment):
113
+ print("Error: Expected a PVAssessment object.")
114
+ return ""
115
+
116
+ if not isinstance(score, int) or score < 1 or score > 50:
117
+ print(f"Error: Invalid score '{score}' for '{value}'. Expected a number between 1 and 50.")
118
+ return ""
119
+
120
+ entry_list = pv_assessment.Entries.get(value)
121
+ if not entry_list or not isinstance(entry_list, list) or len(entry_list) == 0:
122
+ print(f"Error: No entries found for value '{value}'.")
123
+ return ""
124
+
125
+ # Use the first PVEntry in the list
126
+ pv_entry = entry_list[0]
127
+
128
+ rating_definition = pv_entry.Rating_Definition
129
+ if not isinstance(rating_definition, str) or not rating_definition:
130
+ print(f"Error: No valid rating definition found for '{value}'.")
131
+ return ""
132
+
133
+ rating_definition_list = parse_values(rating_definition, delimiter=";")
134
+
135
+ # Find the corresponding description based on the score range
136
+ for definition in rating_definition_list:
137
+ try:
138
+ range_part, description = definition.split(":", 1)
139
+ range_part = range_part.strip()
140
+
141
+ if "-" in range_part:
142
+ range_lower, range_upper = map(int, range_part.split("-"))
143
+ else:
144
+ range_lower = range_upper = int(range_part)
145
+
146
+ if range_lower <= score <= range_upper:
147
+ return description.strip()
148
+
149
+ except ValueError:
150
+ print(f"Error: Invalid rating definition format for '{value}': {definition}")
151
+ continue
152
+
153
+ print(f"Error: No matching rating definition found for score {score} in '{value}'.")
154
+ return ""
common/RespondentAgent.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from crewai import Agent,Task,Process,Crew
2
+ from crewai_tools import FileReadTool, TXTSearchTool
3
+ from crewai.tasks import OutputFormat
4
+ from pydantic import BaseModel
5
+ from typing import List, Dict
6
+
7
+ import datetime
8
+ import json
9
+ import os
10
+ import pandas as pd
11
+ import pprint
12
+
13
+ from UserProfile import *
14
+
15
+ class RespondentAgent:
16
+ def __init__(self, user_profile, agent):
17
+ self.user_profile = user_profile
18
+ self.agent = agent
19
+
20
+ def set_user_profile(self, user_profile):
21
+ self.user_profile = user_profile
22
+
23
+ def set_agent(self, agent):
24
+ self.agent = agent
25
+
26
+ def __repr__(self):
27
+ return f"RespondentAgent(user_profile={self.user_profile}, agent={self.agent})"
28
+
29
+
30
+ @staticmethod
31
+ def create(user_profile, agent_detail_file, llm, respondent_type="INDIVIDUAL USER"):
32
+ """
33
+ Static method to create a respondent agent using user or group data and other details.
34
+
35
+ :param user_profile: The backstory and user information (individual or group).
36
+ :param agent_detail_file: File that contains fast facts or additional context.
37
+ :param llm: The language model to be used by the agent.
38
+ :param respondent_type: Specifies what type of respondent to simulate - individual, focus group or individual user from a segment
39
+ :return: A configured Agent object.
40
+ """
41
+
42
+ # Unified conditional block
43
+ if respondent_type == "FOCUS GROUP":
44
+ diversity_note = "your focus group"
45
+ elif respondent_type == "INDIVIDUAL USER REPRESENTING A SEGMENT":
46
+ diversity_note = "your segment"
47
+ else:
48
+ diversity_note = "your unique voice"
49
+
50
+ user_profile_str = repr(user_profile)
51
+
52
+ myRole = f"""
53
+ You are simulating a {respondent_type} respondent based on the following {respondent_type} profile details:
54
+
55
+ {user_profile_str}
56
+
57
+ You must draw on your {respondent_type} profile in each and every one of your tasks.
58
+ """
59
+
60
+ print(f"Role: {myRole}")
61
+
62
+ myGoal = f"""
63
+ This is your VERY IMPORTANT GOAL:
64
+
65
+ You represent a **{respondent_type}** with a specific **demographic background, cultural context, and lived experience**.
66
+ Your responses must reflect the **{diversity_note}**, while staying grounded in the boundaries of **your {respondent_type}**.
67
+
68
+ You MAY BE guided by real-life behavioral drivers, grounded in the **Jobs To Be Done (JTBD) framework**.
69
+ If the JTBD is provided, then use the following internal map to shape your responses and decision-making:
70
+
71
+ ### 🧠 JTBD-Driven Behavioral Profile
72
+ - **Context of Use**: When and where the product fits into your life (e.g., "mid-morning at work", "after a long day").
73
+ - **Trigger**: What causes you to consider using it (e.g., stress, thirst, social situation).
74
+ - **Motivation**: What you are hoping to achieve (e.g., energy, comfort, focus).
75
+ - **Need of the Moment**: What you immediately need resolved or improved.
76
+ - **Perceived Solution**: How you think the product helps you.
77
+ - **Barrier**: What might stop you from using it (e.g., cost, health concern, skepticism).
78
+ - **Top 3 JTBDs**: The most important use cases you believe the product addresses.
79
+ - **Solution**: The overall benefit or outcome you believe the product offers.
80
+
81
+ Use these internal drivers to shape your thoughts, behaviors, tone, and choicesβ€”**even when not directly stated**.
82
+
83
+ ---
84
+
85
+ ### **General Guidelines:**
86
+ - Keep responses natural and authenticβ€”rooted in **realistic thought patterns, language, and perspectives**.
87
+ - Stay consistent with the values, tone, and lived experience of **your {respondent_type}**.
88
+ - Avoid sounding genericβ€”**express culturally and contextually grounded opinions**.
89
+
90
+ ---
91
+
92
+ ### **For Interviews:**
93
+ - Speak as if in a real conversationβ€”**relatable, honest, and situational**.
94
+ - Use your JTBD profile to explain *why* you’d use a product, hesitate, or find value in something.
95
+ - Refer to triggers, motivations, or barriers in your own words (e.g., "I usually reach for this when...", "I’m not sure because...").
96
+
97
+ ---
98
+
99
+ ### **For Surveys:**
100
+ - Choose answers that match **your JTBD profile** and **personal logic**.
101
+ - Give justifications that reflect your real internal reasoning (e.g., "because it helps me focus when I’m stressed", "I don’t trust drinks with too many ingredients").
102
+ - Stay internally consistentβ€”responses should always match **your JTBD-based identity**.
103
+
104
+ ---
105
+
106
+ ### **Cultural & Personalisation Considerations:**
107
+ - Maintain a clear sense of **{respondent_type} identity** throughoutβ€”how they think, speak, and decide.
108
+ - Avoid artificial or overly polished languageβ€”speak with **personal texture and social realism**.
109
+ - Match the **tone and voice** to what feels natural for **your {respondent_type}**, whether formal, casual, reserved, or expressive.
110
+
111
+ By following these guidelines and grounding your behavior in the JTBD framework, your responses will remain **authentic, contextually appropriate, and true to your {respondent_type.lower()} profile**.
112
+ """
113
+
114
+ # Initialize myBackstory with a default value
115
+ myBackstory = f"No backstory available. Focus on your {respondent_type} profile and VERY IMPORTANT GOAL instead."
116
+
117
+ if agent_detail_file is not None and os.path.isfile(agent_detail_file):
118
+ print(f"Reading fast facts from {agent_detail_file}")
119
+
120
+ fast_facts = FastFacts.read_from_excel(agent_detail_file)
121
+
122
+ if fast_facts:
123
+ fast_facts_str = repr(fast_facts)
124
+ myBackstory = f"""
125
+ Your BACKSTORY has been enriched with a set of FAST FACTS about the {respondent_type} whose responses you are simulating.
126
+
127
+ You must draw on your BACKSTORY FAST FACTS details in each and every one of your tasks.
128
+
129
+ Your BACKSTORY FAST FACTS details are as follows:
130
+
131
+ {fast_facts_str}
132
+ """
133
+
134
+
135
+
136
+ print(f"Backstory: {myBackstory}")
137
+ else:
138
+ print(f"No fast facts file found: {agent_detail_file}")
139
+
140
+
141
+ # Create agent object
142
+ agent = Agent(
143
+ role=myRole,
144
+ goal=myGoal,
145
+ backstory=myBackstory,
146
+ llm=llm,
147
+ verbose=True,
148
+ max_retry_limit=5,
149
+ allow_delegation=False,
150
+ memory=True
151
+ )
152
+
153
+ return RespondentAgent(user_profile, agent)
common/UserProfile.py ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import datetime
3
+ import textwrap
4
+
5
+ from Config import Config
6
+ from DataDictionary import *
7
+ from FastFacts import *
8
+ from PersonalityValues import *
9
+
10
+ import pandas as pd
11
+ import numpy as np
12
+
13
+ class AttributeGroup:
14
+ """
15
+ Represents an attribute group (type) in the user profile.
16
+ Fields are dynamically populated based on the group's parameters.
17
+ """
18
+ def __init__(self, group_name, fields):
19
+ self.group_name = group_name
20
+ self.fields = {field: None for field in fields}
21
+
22
+ def set_field(self, field_name, value):
23
+ """
24
+ Set a value for a specific field in the attribute group.
25
+ If the field does not exist, it is added dynamically.
26
+ """
27
+ if field_name not in self.fields:
28
+ print(f"Warning: Field '{field_name}' not found in '{self.group_name}'. Adding dynamically.")
29
+ self.fields[field_name] = None # Add the field dynamically
30
+
31
+ self.fields[field_name] = value # Assign the provided value
32
+
33
+ def get_field(self, field_name):
34
+ """
35
+ Get a value for a specific field in the attribute group.
36
+ """
37
+ if field_name in self.fields:
38
+ return self.fields[field_name]
39
+ else:
40
+ print(f"Field '{field_name}' does not exist in the '{self.group_name}' attribute group.")
41
+
42
+ def to_dict(self):
43
+ """
44
+ Convert the attribute group to a dictionary with non-null values.
45
+ """
46
+ return {field: value for field, value in self.fields.items() if value is not None}
47
+
48
+ def __repr__(self):
49
+ """
50
+ String representation of the attribute group with non-null fields.
51
+ """
52
+ fields_repr = ", ".join(f"{k}={v}" for k, v in self.fields.items() if v is not None)
53
+ return f"{self.group_name}({fields_repr})"
54
+
55
+
56
+ class UserProfile:
57
+ """
58
+ Represents a user profile, dynamically initialised with attribute groups based on the DataDictionary.
59
+ Includes a lazily initialised FastFacts section for storing additional facts about the user.
60
+ """
61
+ def __init__(self, data_dictionary):
62
+ self.data_dictionary = data_dictionary # Store the data dictionary for dynamic group creation
63
+ self.attribute_groups = {} # Dictionary to hold created attribute groups
64
+ self.ID = None # Unique identifier for the user profile
65
+ self.fast_facts = None # Lazily initialised FastFacts attribute
66
+
67
+ def set_ID(self, ID):
68
+ """
69
+ Set the ID for the user profile.
70
+ """
71
+ self.ID = ID
72
+
73
+ def set_field(self, group_name, field_name, value):
74
+ """
75
+ Set a value for a field in a specific attribute group.
76
+ If the group does not already exist, it will be created dynamically.
77
+ """
78
+ if group_name not in self.attribute_groups:
79
+ # Create the AttributeGroup only when needed
80
+ if group_name in self.data_dictionary.get_types():
81
+ self.attribute_groups[group_name] = AttributeGroup(
82
+ group_name,
83
+ self.data_dictionary.get_parameters(type=group_name)
84
+ )
85
+ else:
86
+ print(f"Attribute group '{group_name}' is not defined in the DataDictionary.")
87
+ return
88
+ self.attribute_groups[group_name].set_field(field_name, value)
89
+
90
+ def get_field(self, group_name, field_name):
91
+ """
92
+ Get a value for a field in a specific attribute group.
93
+ """
94
+ if group_name not in self.attribute_groups:
95
+ print(f"Attribute group '{group_name}' is not found.")
96
+ return None
97
+
98
+ return self.attribute_groups[group_name].get_field(field_name)
99
+
100
+ def set_fields_from_list(self, attribute_type, fields, field_key="field_name", value_key="value"):
101
+ """
102
+ Sets fields in a UserProfile from a list of field-value pairs.
103
+
104
+ Args:
105
+ attribute_type (str): The name of the attribute group (e.g., "Values").
106
+ fields (list): A list of dictionaries with field names and values to set.
107
+ field_key (str): The key in the dictionary that corresponds to the field name.
108
+ value_key (str): The key in the dictionary that corresponds to the value.
109
+ """
110
+ if not isinstance(fields, list) or not all(isinstance(field, dict) for field in fields):
111
+ print("Fields must be a list of dictionaries.")
112
+ return
113
+
114
+ for field in fields:
115
+ field_name = field.get(field_key) # Use the specified key for field names
116
+ value = field.get(value_key) # Use the specified key for values
117
+
118
+ if field_name is not None and value is not None:
119
+ self.set_field(attribute_type, field_name, value)
120
+ else:
121
+ print(f"Skipping invalid field: {field}")
122
+
123
+
124
+ def get_attributes(self, attribute_type=None):
125
+ """
126
+ Retrieve attributes for a specific attribute type or all attributes if no type is specified.
127
+
128
+ Args:
129
+ attribute_type (str, optional): The name of the attribute group to retrieve.
130
+ If None, retrieves all attributes.
131
+
132
+ Returns:
133
+ dict: A dictionary of non-null attributes for the specified type or all types.
134
+ """
135
+ if attribute_type:
136
+ if attribute_type in self.attribute_groups:
137
+ return self.attribute_groups[attribute_type].to_dict()
138
+ else:
139
+ print(f"Attribute type '{attribute_type}' does not exist in this user profile.")
140
+ return {}
141
+ else:
142
+ # Combine all attributes if no specific type is specified
143
+ all_attributes = {}
144
+ for group_name, group in self.attribute_groups.items():
145
+ all_attributes.update({f"{group_name}_{k}": v for k, v in group.to_dict().items()})
146
+ return all_attributes
147
+
148
+
149
+ def add_fast_facts(self, facts):
150
+ """
151
+ Lazily initialise and add a set of facts to the FastFacts attribute.
152
+
153
+ Args:
154
+ facts (iterable): A collection of facts to add to FastFacts.
155
+ """
156
+ if not isinstance(facts, (set, list)):
157
+ print("Facts must be provided as a set or list.")
158
+ return
159
+
160
+ if self.fast_facts is None:
161
+ self.fast_facts = FastFacts()
162
+
163
+ self.fast_facts.add_facts(facts)
164
+
165
+ def to_dict(self, data_dictionary):
166
+ """
167
+ Convert the entire user profile to a dictionary.
168
+ """
169
+ profile_dict = {'ID': self.ID}
170
+
171
+ # Iterate over all types in the data dictionary
172
+ for attribute_type in data_dictionary.get_types():
173
+ group_attributes = self.get_attributes(attribute_type)
174
+ for field_name in data_dictionary.get_parameters(type=attribute_type):
175
+ full_field_name = f"{attribute_type}_{field_name}"
176
+
177
+ # Access the value directly from the dictionary
178
+ value = group_attributes.get(field_name)
179
+
180
+ # Ensure value exists before updating the profile_dict
181
+ if value is not None:
182
+ profile_dict[full_field_name] = value
183
+ else:
184
+ print(f"Warning: {field_name} not found in type {attribute_type}")
185
+
186
+ return profile_dict
187
+
188
+ def __repr__(self):
189
+ """
190
+ String representation of the user profile with attribute groups.
191
+ """
192
+ groups_repr = ", ".join(str(group) for group in self.attribute_groups.values())
193
+ return f"UserProfile(ID={self.ID}, {groups_repr})"
194
+
195
+
196
+ @staticmethod
197
+ def write_user_profiles_to_excel(user_profiles, filename, data_dictionary):
198
+ """
199
+ Writes a list of UserProfile objects to an Excel file with columns ordered by the data dictionary.
200
+
201
+ Args:
202
+ user_profiles (list): List of UserProfile objects.
203
+ filename (str): Path to the Excel file.
204
+ data_dictionary (object): Data dictionary containing column order and metadata.
205
+ """
206
+ if not user_profiles:
207
+ print("No user profiles to write.")
208
+ return
209
+
210
+ # Convert user profiles to a list of dictionaries
211
+ # profiles_data = [user_profile.to_dict(data_dictionary) for user_profile in user_profiles]
212
+ profiles_data = []
213
+
214
+ for i, user_profile in enumerate(user_profiles):
215
+ print(f"Processing profile {i+1}: {user_profile}")
216
+ profile_dict = user_profile.to_dict(data_dictionary)
217
+ print(f"Dict output: {profile_dict}")
218
+ profiles_data.append(profile_dict)
219
+
220
+ # Get the column order from the data dictionary
221
+ column_order = ['ID'] + data_dictionary.get_columns() # Ensure this method exists and returns the column names in the desired order
222
+
223
+ # Create a DataFrame from the profiles data
224
+ df = pd.DataFrame(profiles_data)
225
+
226
+ # Ensure all columns in the data dictionary are present in the DataFrame
227
+ for column in column_order:
228
+ if column not in df.columns:
229
+ print(f"Column {column} is missing")
230
+ df[column] = None # Add missing columns with NaN/None
231
+
232
+ # Reorder columns based on the data dictionary
233
+ df = df[column_order]
234
+
235
+ # Write the DataFrame to an Excel file
236
+ df.to_excel(filename, index=False)
237
+
238
+ print(f"User profiles successfully written to {filename}")
239
+
240
+
241
+ @staticmethod
242
+ def read_user_profiles_from_excel(respondent_details_file, data_dictionary, pv_criteria):
243
+ """
244
+ Reads a list of UserProfile objects from an Excel file and converts scores into definitions.
245
+
246
+ Args:
247
+ respondent_details_file (str): Path to the Excel file.
248
+ data_dictionary (DataDictionary): Instance of DataDictionary containing valid fields.
249
+ pv_criteria (PVAssessment): Instance of PVAssessment to retrieve text descriptions of values.
250
+
251
+ Returns:
252
+ list: List of UserProfile objects.
253
+ """
254
+ user_profiles = []
255
+
256
+ # Read the Excel file into a DataFrame
257
+ df = pd.read_excel(respondent_details_file)
258
+
259
+ # Iterate over the rows in the DataFrame
260
+ for _, row in df.iterrows():
261
+ user_profile = UserProfile(data_dictionary)
262
+
263
+ # Set basic fields for UserProfile if they are present
264
+ if pd.notna(row.get('ID')):
265
+ user_profile.set_ID(row.get('ID'))
266
+
267
+ # Iterate over all types in the data dictionary
268
+ for attribute_type in data_dictionary.get_types():
269
+ for field in data_dictionary.get_parameters(type=attribute_type):
270
+ full_field_name = f"{attribute_type}_{field}"
271
+ raw_value = row.get(full_field_name)
272
+
273
+ if pd.notna(raw_value):
274
+ # Special handling for Values type
275
+ if attribute_type.lower() == "values":
276
+ try:
277
+ score = int(raw_value) # Convert to integer score
278
+ # Get description from PVAssessment
279
+ description = PVAssessment.get_score_definition(field, score, pv_criteria)
280
+
281
+ # Set score field
282
+ #user_profile.set_field(attribute_type, field, score)
283
+
284
+ # Set description field
285
+ description_field_name = f"{attribute_type}_{field}_Description"
286
+ user_profile.set_field(attribute_type, description_field_name, description)
287
+
288
+ except ValueError:
289
+ print(f"Warning: Could not convert '{raw_value}' to an integer for field '{full_field_name}'.")
290
+ else:
291
+ # Generic field setting for non-Values types
292
+ user_profile.set_field(attribute_type, field, raw_value)
293
+
294
+ user_profiles.append(user_profile)
295
+
296
+ print(f"User profiles successfully read from {respondent_details_file}")
297
+ return user_profiles
298
+
299
+
300
+ class UserProfileDetail:
301
+ def __init__(self, key, original_value, qa_check, value):
302
+ """
303
+ Initialize a UserProfileDetail entry.
304
+ """
305
+ self.key = key
306
+ self.original_value = original_value
307
+ self.qa_check = qa_check
308
+ self.value = value
309
+
310
+ def __repr__(self):
311
+ fields = {k: v for k, v in self.__dict__.items() if v and v != "Unable to map"}
312
+ formatted_fields = [f"{k}='{v}'" for k, v in fields.items()]
313
+ return f"{self.__class__.__name__}: " + ", ".join(formatted_fields) + ")"
314
+
315
+ @staticmethod
316
+ def filter_profiles(profiles, key=None, qa_check=None, value=None):
317
+ """
318
+ Static method to filter user profiles by key, QA check status, or value.
319
+
320
+ Args:
321
+ profiles (list): List of UserProfileDetail objects.
322
+ key (str, optional): The key to filter by.
323
+ qa_check (str, optional): The QA check status to filter by.
324
+ value (str, optional): The value to filter by.
325
+
326
+ Returns:
327
+ list: A list of UserProfileDetail entries that match the criteria.
328
+ """
329
+ return [
330
+ profile for profile in profiles
331
+ if (key is None or profile.key == key) and
332
+ (qa_check is None or profile.qa_check == qa_check) and
333
+ (value is None or profile.value == value)
334
+ ]
335
+
336
+ @staticmethod
337
+ def generate_user_profiles(file_path):
338
+ """
339
+ Static method to generate a list of UserProfileDetail entries from an Excel (.xlsx) file.
340
+
341
+ Args:
342
+ file_path (str): The path to the Excel file containing user profile entries.
343
+
344
+ Returns:
345
+ list: A list of UserProfileDetail objects generated from the file.
346
+ """
347
+ # Read the Excel file
348
+ df = pd.read_excel(file_path)
349
+
350
+ profiles = []
351
+ for _, row in df.iterrows():
352
+ profile = UserProfileDetail(
353
+ key=row['Key'],
354
+ original_value=row['Value'],
355
+ qa_check=row['QA Check'],
356
+ value=row['Revised Value']
357
+ )
358
+ profiles.append(profile)
359
+ return profiles
common/Utilities.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from collections import OrderedDict
3
+ from datetime import datetime
4
+ import pandas as pd
5
+ import os
6
+
7
+
8
+ def read_text_file(file_path):
9
+ with open(file_path, 'r') as file:
10
+ content = file.read()
11
+ return content
12
+
13
+ # FILTER FUNCTION
14
+ def filter_profiles_by_input(profiles, data_dictionary):
15
+ """Interactive filtering with step-by-step criteria selection and data dictionary integration"""
16
+ print("\n=== FILTER SETTINGS ===")
17
+
18
+ # Get column names from the data dictionary
19
+ try:
20
+ dd_columns = data_dictionary.get_columns() # Use get_columns() from your DataDictionary class
21
+ if not dd_columns:
22
+ raise RuntimeError("Data dictionary returned no columns.")
23
+ except Exception as e:
24
+ raise RuntimeError(f"Failed to retrieve columns from data dictionary: {str(e)}")
25
+
26
+ print("Available columns:")
27
+ for col in sorted(dd_columns):
28
+ print(f" β€’ {col}")
29
+
30
+ remaining_columns = set(dd_columns)
31
+ filtered_profiles = profiles
32
+
33
+ while True:
34
+ if not remaining_columns:
35
+ print("\nNo more columns available for filtering.")
36
+ break
37
+
38
+ print("\nColumns available to filter on:")
39
+ for col in sorted(remaining_columns):
40
+ print(f" β€’ {col}")
41
+
42
+ column = input("\nEnter column name to filter (press Enter to finish): ").strip()
43
+
44
+ if not column:
45
+ break # Stop filtering when user presses Enter
46
+
47
+ if column not in remaining_columns:
48
+ print(f"\nError: Column '{column}' not found or already used for filtering.")
49
+ continue
50
+
51
+ value = input(f"Enter value to filter for '{column}' (press Enter to skip): ").strip()
52
+
53
+ if not value:
54
+ print("\nNo value entered. Skipping this filter.")
55
+ continue
56
+
57
+ new_filtered_profiles = [
58
+ profile for profile in filtered_profiles
59
+ if value.lower() in str(profile.get_attributes().get(column, "")).lower()
60
+ ]
61
+
62
+ if not new_filtered_profiles:
63
+ print(f"\nNo matches for '{column}' containing '{value}'. Returning to previous state.")
64
+ continue
65
+
66
+ filtered_profiles = new_filtered_profiles
67
+ remaining_columns.remove(column)
68
+
69
+ print(f"\nFound {len(filtered_profiles)} matching profiles")
70
+ print(f"Profiles filtered out: {len(profiles) - len(filtered_profiles)}")
71
+
72
+ confirm = input("\nProceed with another filter? (Yes/No): ").strip().lower()
73
+ while confirm not in ['yes', 'no']:
74
+ confirm = input("Invalid input. Please enter 'Yes' or 'No': ").strip().lower()
75
+
76
+ if confirm == 'no':
77
+ break
78
+
79
+ return filtered_profiles
80
+
81
+
82
+
83
+ def generate_file_excerpt(file_path, pattern, max_chars=5000):
84
+ # Step 1: Read the file content
85
+ with open(file_path, 'r') as file:
86
+ lines = file.readlines()
87
+
88
+ # Step 2: Extract lines starting with "pattern"
89
+ extracted_lines = [line.replace(pattern, '').strip() for line in lines if line.startswith(pattern) and len(line.split()) >= 6]
90
+
91
+ # Step 3: Join all extracted lines into a single string
92
+ full_text = '\n'.join(extracted_lines)
93
+
94
+ # Step 4: Return the first max_chars characters
95
+ return full_text[-max_chars:] # Taking the last max_chars characters
96
+
97
+
98
+ def generate_dict_from_file(file_name, column_name1, column_name2):
99
+ df = pd.read_excel(file_name, usecols=[column_name1, column_name2], engine='openpyxl') # Specify the engine
100
+
101
+ # Convert the DataFrame to a dictionary with Questions as keys and Answers as values
102
+ ordered_dict = OrderedDict(zip(df[column_name1], df[column_name2]))
103
+
104
+ return ordered_dict
105
+
106
+ def find_latest_timestamped_file(directory, filename_pattern):
107
+ """Finds the file with the latest timestamp within a given directory.
108
+
109
+ Args:
110
+ directory: The directory to search for files.
111
+ filename_pattern: The pattern to match filenames (e.g., "interview_results.xlsx").
112
+
113
+ Returns:
114
+ The path to the latest timestamped file, or None if no matching files were found.
115
+ """
116
+
117
+ files = [f for f in os.listdir(directory) if f.endswith(filename_pattern)]
118
+ if not files:
119
+ print(f"Unable to find file with {filename_pattern} in {directory}")
120
+ return None
121
+
122
+ latest_file = sorted(files, key=lambda f: os.path.getmtime(os.path.join(directory, f)), reverse=True)[0]
123
+ return os.path.join(directory, latest_file)
124
+
125
+
126
+ def generate_pivot_table(original_table, index, columns, values):
127
+ # Step 1: Flatten all SurveyEntry objects into a DataFrame
128
+ df = pd.json_normalize(entry.dict() for report in original_table for entry in report.Entries)
129
+
130
+ # Step 2: Extract the original order of 'columns' (e.g., questions)
131
+ original_order = df[columns].drop_duplicates().tolist()
132
+
133
+ # Step 3: Pivot the DataFrame
134
+ summary_df = df.pivot(index=index, columns=columns, values=values)
135
+
136
+ # Step 4: Reindex to preserve the original order of columns
137
+ summary_df = summary_df.reindex(columns=original_order).reset_index().fillna("No Response")
138
+
139
+ # Return the summary DataFrame
140
+ return summary_df
researchsimulation/InteractiveInterviewChatbot.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pip install groq
2
+ #pip install langchain_groq
3
+ #pip install crewai
4
+ #pip install crewai_tools
5
+ #pip install pydantic
6
+ #pip install XlsxWriter
7
+ #pip install openpyxl
8
+ #pip install pandas
9
+ #pip install streamlit
10
+
11
+ import gradio as gr
12
+ from RespondentAgent import *
13
+ from InterviewSimulation import *
14
+ from langchain_groq import ChatGroq
15
+
16
+ def ask_interview_question(respondent_agent_full, question):
17
+ respondent_agent = respondent_agent_full.agent
18
+ respondent_agent_style = respondent_agent_full.user_profile.style
19
+ respondent_agent_tone = respondent_agent_full.user_profile.tone
20
+ respondent_agent_values = repr(respondent_agent_full.user_profile.values)
21
+
22
+ question_task_description = f"""
23
+ Interview Question: {question}\n
24
+ The Market Research Respondent must answer this exact question in alignment with their values {respondent_agent_values}
25
+ This question may consist of multiple parts, but it should not be split apart or modified in any way.
26
+ The answer must be based solely on the Market Research Respondent's knowledge and backstory.
27
+ The Market Research Respondent should not use any external sources or tools.
28
+ The Market Research Respondent should refer to the provided search text if needed.
29
+ Under no circumstances should an answer be selected that contradicts or is inconsistent with the respondent's profile.
30
+ """
31
+
32
+ question_task_expected_output = f"""
33
+ <answer>, the Market Research Respondent's answer to the exact question: '{question}'
34
+ No changes to the question are allowed. None of the response(s) selected should contradict or be inconsistent with what is expected from the respondent.
35
+ Answer should be expressed using the Market Respondent's style: {respondent_agent_style}
36
+ and in the Market Respondent's tone: {respondent_agent_tone}
37
+ """
38
+
39
+ question_task = Task(
40
+ description=question_task_description,
41
+ expected_output=question_task_expected_output,
42
+ agent=respondent_agent
43
+ )
44
+
45
+ # Create and execute the crew for this question and report
46
+ crew = Crew(
47
+ agents=[respondent_agent],
48
+ tasks=[question_task],
49
+ process=Process.sequential
50
+ )
51
+
52
+ try:
53
+ crew_output = crew.kickoff()
54
+
55
+ task_output = question_task.output
56
+
57
+ if task_output.raw:
58
+ answer = task_output.raw
59
+ return answer
60
+ else:
61
+ print("No raw task output data")
62
+ except Exception as e:
63
+ exc_type, exc_value, exc_traceback = sys.exc_info()
64
+ print("Exception type:", exc_type)
65
+ print("Exception message:", exc_value)
66
+ print("Traceback details:")
67
+ traceback.print_tb(exc_traceback)
68
+
69
+ # MAIN
70
+ Config.load_environment("..", "chatbot")
71
+
72
+ # SET UP LLAMA
73
+ fact_based_llm = ChatGroq(
74
+ groq_api_key=Config.groq_api_key,
75
+ model_name=Config.agent_model,
76
+ temperature=0.1, # Low temperature for deterministic output
77
+ )
78
+
79
+ # generate respondent summary data from file
80
+ respondent_agent_user_profiles = UserProfile.read_user_profiles_from_excel(Config.respondent_summary_file)
81
+
82
+ user_profile = respondent_agent_user_profiles[0]
83
+ respondent_agent_detail_file = f"{Config.config_dir}/{user_profile.ID}_fast_facts.xlsx"
84
+ respondent_agent = RespondentAgent.create(user_profile, respondent_agent_detail_file, fact_based_llm)
85
+
86
+ if user_profile.name is not None:
87
+ respondent_agent_name = user_profile.name
88
+ else:
89
+ respondent_agent_name = respondent_agent.ID
90
+
91
+ # Example chatbot function
92
+ def chatbot_interface(message, history=[]):
93
+ response = ask_interview_question(respondent_agent, message)
94
+
95
+ # Append user message in the correct format
96
+ history.append({"role": "user", "content": f"You: {message}"})
97
+ # Append respondent's message in the correct format
98
+ history.append({"role": "assistant", "content": f"{respondent_agent_name}: {response}"})
99
+
100
+ # Return updated history and clear the input field
101
+ return history, ""
102
+
103
+ # Create Gradio Interface
104
+ with gr.Blocks() as demo:
105
+ # Header Section
106
+ with gr.Row():
107
+ gr.Markdown(f"## Welcome to PreData.AI's Market Research Panel - you are speaking with {respondent_agent_name}")
108
+
109
+ # Chatbot Section
110
+ chatbot = gr.Chatbot(type="messages", label=None, height=400)
111
+
112
+ # Input Section
113
+ with gr.Row():
114
+ msg = gr.Textbox(placeholder="Ask your question here...")
115
+
116
+ # Footer Section
117
+ with gr.Row():
118
+ gr.Markdown("Β© 2024 PreData.AI - All rights reserved.")
119
+
120
+ # Chatbot Interaction
121
+ msg.submit(chatbot_interface, [msg, chatbot], [chatbot, msg])
122
+
123
+ # Run the Gradio app
124
+ demo.launch(share=True)
researchsimulation/InteractiveInterviewSimulation.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pip install groq
2
+ #pip install langchain_groq
3
+ #pip install crewai
4
+ #pip install crewai_tools
5
+ #pip install pydantic
6
+ #pip install XlsxWriter
7
+ #pip install openpyxl
8
+ #pip install pandas
9
+ #pip install streamlit
10
+
11
+ import streamlit as st
12
+ from Config import Config
13
+ from Utilities import *
14
+ from UserProfile import *
15
+ from RespondentAgent import *
16
+ from Interview import *
17
+ from InterviewSimulation import *
18
+ from itertools import islice
19
+ from groq import Groq
20
+ from langchain_groq import ChatGroq
21
+
22
+ def ask_interview_question(respondent_agent_full, question):
23
+ respondent_agent = respondent_agent_full.agent
24
+ respondent_agent_style = respondent_agent_full.user_profile.style
25
+ respondent_agent_tone = respondent_agent_full.user_profile.tone
26
+ respondent_agent_values = repr(respondent_agent_full.user_profile.values)
27
+
28
+ question_task_description = f"""
29
+ Interview Question: {question}\n
30
+ The Market Research Respondent must answer this exact question in alignment with their values {respondent_agent_values}
31
+ This question may consist of multiple parts, but it should not be split apart or modified in any way.
32
+ The answer must be based solely on the Market Research Respondent's knowledge and backstory.
33
+ The Market Research Respondent should not use any external sources or tools.
34
+ The Market Research Respondent should refer to the provided search text if needed.
35
+ Under no circumstances should an answer be selected that contradicts or is inconsistent with the respondent's profile.
36
+ """
37
+
38
+ question_task_expected_output = f"""
39
+ <answer>, the Market Research Respondent's answer to the exact question: '{question}'
40
+ No changes to the question are allowed. None of the response(s) selected should contradict or be inconsistent with what is expected from the respondent.
41
+ Answer should be expressed using the Market Respondent's style: {respondent_agent_style}
42
+ and in the Market Respondent's tone: {respondent_agent_tone}
43
+ """
44
+
45
+ question_task = Task(
46
+ description=question_task_description,
47
+ expected_output=question_task_expected_output,
48
+ agent=respondent_agent
49
+ )
50
+
51
+ # Create and execute the crew for this question and report
52
+ crew = Crew(
53
+ agents=[respondent_agent],
54
+ tasks=[question_task],
55
+ process=Process.sequential
56
+ )
57
+
58
+ try:
59
+ crew_output = crew.kickoff()
60
+
61
+ task_output = question_task.output
62
+
63
+ if task_output.raw:
64
+ answer = task_output.raw
65
+ return answer
66
+ else:
67
+ print("No raw task output data")
68
+ except Exception as e:
69
+ exc_type, exc_value, exc_traceback = sys.exc_info()
70
+ print("Exception type:", exc_type)
71
+ print("Exception message:", exc_value)
72
+ print("Traceback details:")
73
+ traceback.print_tb(exc_traceback)
74
+
75
+ # MAIN
76
+ Config.load_environment("..", "dev1")
77
+
78
+ # SET UP LLAMA
79
+ fact_based_llm = ChatGroq(
80
+ groq_api_key=Config.groq_api_key,
81
+ model_name=Config.agent_model,
82
+ temperature=0.1, # Low temperature for deterministic output
83
+ # max_tokens=500, # Enough tokens to complete factual sentences
84
+ # stop_sequences=["\n", "<|endoftext|>"] # Stops at logical sentence boundaries
85
+ )
86
+
87
+ exploratory_llm = ChatGroq(
88
+ groq_api_key=Config.groq_api_key,
89
+ model_name=Config.agent_model,
90
+ temperature=0.9, # Higher temperature for more creative output
91
+ max_tokens=2000, # Allows for more extended, imaginative responses
92
+ stop_sequences=["\n", "<|endoftext|>"] # Standard stop sequences for controlling output length
93
+ )
94
+
95
+ # generate respondent summary data from file
96
+ respondent_agent_user_profiles = UserProfile.read_user_profiles_from_excel(Config.respondent_summary_file)
97
+
98
+ user_profile = respondent_agent_user_profiles[0]
99
+ respondent_agent_detail_file = f"{Config.config_dir}/{user_profile.ID}_fast_facts.xlsx"
100
+ respondent_agent = RespondentAgent.create(user_profile, respondent_agent_detail_file, fact_based_llm)
101
+
102
+ # Streamlit UI
103
+ st.title("Interactive Interview Simulation")
104
+ st.write(f"Using Respondent Profile: {user_profile.name} (ID: {user_profile.ID})")
105
+
106
+ # Question input and response loop
107
+ st.write("Type your interview questions below. To exit, type 'exit'.")
108
+
109
+ if "questions" not in st.session_state:
110
+ st.session_state.questions = []
111
+ if "responses" not in st.session_state:
112
+ st.session_state.responses = []
113
+
114
+ question = st.text_input("Enter your interview question:")
115
+
116
+ if st.button("Ask Question"):
117
+ if question.strip().lower() == "exit":
118
+ st.write("**Session ended. Thank you for using the simulation!**")
119
+ elif question.strip():
120
+ response = ask_interview_question(respondent_agent, question)
121
+ st.session_state.questions.append(question)
122
+ st.session_state.responses.append(response)
123
+ st.write(f"**Q:** {question}")
124
+ st.write(f"**A:** {response}")
125
+ else:
126
+ st.error("Please enter a valid question.")
127
+
128
+ # Display previous questions and answers
129
+ if st.session_state.questions:
130
+ st.write("### Previous Questions and Responses:")
131
+ for q, a in zip(st.session_state.questions, st.session_state.responses):
132
+ st.write(f"- **Q:** {q}")
133
+ st.write(f" **A:** {a}")
researchsimulation/InterviewSimulation.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from crewai import Agent,Task,Process,Crew
3
+ from crewai_tools import FileReadTool, TXTSearchTool
4
+ from crewai.tasks import OutputFormat
5
+ from pydantic import BaseModel
6
+ from typing import List, Dict, Optional
7
+
8
+ import datetime
9
+ import json
10
+ import os
11
+ import pandas as pd
12
+ import sys
13
+
14
+ from Interview import *
15
+
16
+ #utils
17
+ import re
18
+ import sys
19
+ import traceback
20
+ import json
21
+ import pandas as pd
22
+ from InterviewUtilities import *
23
+
24
+ def select_profiles_by_criteria(profiles, selection_criteria, data_dictionary):
25
+ """
26
+ Selects profiles matching multiple selection criteria.
27
+ Each criterion must be formatted as 'column:value'.
28
+ Profiles must match ALL criteria (logical AND).
29
+
30
+ Args:
31
+ profiles (list): List of profile objects.
32
+ selection_criteria (list): List of strings ['column1:value1', 'column2:value2', ...].
33
+ If empty, returns all profiles.
34
+ data_dictionary: Data dictionary for column validation.
35
+
36
+ Returns:
37
+ list: Selected profiles.
38
+ """
39
+ if not selection_criteria:
40
+ print("No selection criteria provided. Returning all profiles.")
41
+ return profiles
42
+
43
+ dd_columns = data_dictionary.get_columns()
44
+
45
+ # Apply each criterion sequentially
46
+ selected_profiles = profiles
47
+ for criterion in selection_criteria:
48
+ try:
49
+ column, value = [part.strip() for part in criterion.split(":", 1)]
50
+ except ValueError:
51
+ raise ValueError(f"Selection criterion '{criterion}' must be formatted as 'column:value'.")
52
+
53
+ if column not in dd_columns:
54
+ raise ValueError(f"Column '{column}' not found in data dictionary.")
55
+
56
+ selected_profiles = [
57
+ profile for profile in selected_profiles
58
+ if value.lower() == str(profile.get_attributes().get(column, "")).strip().lower()
59
+ ]
60
+
61
+ print(f"Applied criterion '{column}:{value}' β†’ {len(selected_profiles)} profile(s) selected.")
62
+
63
+ # Early exit if no profiles remain
64
+ if not selected_profiles:
65
+ print("No profiles match the combined criteria.")
66
+ break
67
+
68
+ return selected_profiles
69
+
70
+
71
+
72
+ def run_interview(respondent_agent_full, interview_script, output_file_name, llm, is_focus_group=False):
73
+ interview_report_data = []
74
+ respondent_agent = respondent_agent_full.agent
75
+
76
+ # Loop through each section and question to create individual crews
77
+ for entry in interview_script.Entries:
78
+ num = entry.Num
79
+ section = entry.Section
80
+ question = entry.Question
81
+
82
+ print(f"STARTING process for Section: {section}, Question {num}: {question}")
83
+
84
+ # verbiage for respondent_type
85
+ if is_focus_group:
86
+ respondent_type = "Focus Group"
87
+ diversity_note = "collective voices of the focus group"
88
+ else:
89
+ respondent_type = "Individual User"
90
+ diversity_note = "your unique voice of the individual"
91
+
92
+ question_task_description = f"""
93
+ Interview Section: {section}, Question {num}: {question}
94
+
95
+ ### **Your Role & Expectations:**
96
+ You are a {respondent_type} participant with a **specific demographic profile, cultural background, values, lifestyle, and habits**.
97
+
98
+ Your responses must reflect the **{diversity_note}**, and remain grounded in realistic thought patterns, communication styles, and decision-making behavior.
99
+
100
+ ---
101
+
102
+ ### **How to Answer:**
103
+ - Use a tone appropriate to your role as a {respondent_type}:
104
+ - πŸ—£οΈ If you are part of a FOCUS GROUP, speak as a collective group (e.g., β€œwe prefer...”, β€œmost of us think...”).
105
+ - 🧍 If you are an INDIVIDUAL USER, speak from your personal point of view (e.g., β€œI prefer...”, β€œin my experience...”).
106
+ - Reference your **real-life JTBD motivations**, including your triggers, immediate needs, and usage barriers.
107
+ - Express yourself in **natural language**β€”avoid being generic or overly Westernised.
108
+ - Ensure your justification sounds **culturally and contextually grounded** in your background and behavior.
109
+ - If the question is about **your profile, preferences, or past behavior**, only answer using details grounded in your background and JTBD context.
110
+ - If the question is speculative, reflective, or opinion-based, use thoughtful reasoning that aligns with your lifestyle, values, and cultural behavior.
111
+
112
+ ---
113
+
114
+ ### **Mandatory Personalisation (Choose at least one):**
115
+ Your response MUST include a behavioral or contextual anchor:
116
+ - πŸŽ“ **Education & Career Goals**
117
+ - πŸ›οΈ **Shopping Behavior**
118
+ - πŸ“± **Media Habits**
119
+ - 🌍 **Cultural Identity**
120
+
121
+
122
+ ### **Unacceptable Responses:**
123
+ ❌ Using the wrong voice (e.g., β€œI” in a group, β€œwe” for an individual)
124
+ ❌ Generic, vague, or contradictory answers
125
+ ❌ Contradictions to your persona’s traits, preferences, or profile
126
+ """
127
+
128
+ expected_output_text = entry.Expected_Output
129
+ if not expected_output_text:
130
+ expected_output_text = "response: The Market Research Respondent’s answer to the exact question: '{question}'. You must not alter or rephrase the question in any way."
131
+
132
+ question_task_expected_output = generate_json_expected_output(expected_output_text)
133
+
134
+ print(f"Expected output is:\n{question_task_expected_output}\n")
135
+
136
+ question_task = Task(
137
+ description=question_task_description,
138
+ expected_output=question_task_expected_output,
139
+ agent=respondent_agent
140
+ )
141
+
142
+ # Create and execute the crew for this question and report
143
+ crew = Crew(
144
+ agents=[respondent_agent],
145
+ tasks=[question_task],
146
+ process=Process.sequential
147
+ )
148
+
149
+ try:
150
+ crew_output = crew.kickoff()
151
+ print(f"Crew usage metrics: {crew.usage_metrics}")
152
+
153
+ response_text = question_task.output.raw
154
+
155
+ if not response_text:
156
+ print("🚨 No raw task output data")
157
+ return None
158
+
159
+ print(f"RAW OUTPUT START:\n {response_text} \nEND OF RAW OUTPUT")
160
+
161
+ detailed_thoughts, json_str = split_json_string(response_text)
162
+ parsed_response = extract_and_parse_json(json_str)
163
+
164
+ # Initialize the base row data
165
+ row_data = {
166
+ 'Num': num,
167
+ 'Section': section,
168
+ 'Question': question,
169
+ 'Answer': response_text,
170
+ 'Detailed Thoughts': detailed_thoughts
171
+ }
172
+
173
+ if parsed_response:
174
+ print("\nβœ… Successfully Parsed JSON:\n", json.dumps(parsed_response, indent=2, ensure_ascii=False))
175
+ fields = extract_fields_from_expected_output(expected_output_text)
176
+ for field in fields:
177
+ row_data[field.lower()] = parsed_response.get(field.lower())
178
+ else:
179
+ print("\n🚨 No valid JSON extracted - saving raw answer")
180
+
181
+ interview_report_data.append(row_data)
182
+
183
+ except Exception as e:
184
+ exc_type, exc_value, exc_traceback = sys.exc_info()
185
+ print("Exception type:", exc_type)
186
+ print("Exception message:", exc_value)
187
+ print("Traceback details:")
188
+ traceback.print_tb(exc_traceback)
189
+ continue
190
+
191
+ # Convert the list of dictionaries into a DataFrame and save it to Excel
192
+ df = pd.DataFrame(interview_report_data)
193
+ with pd.ExcelWriter(output_file_name, engine='xlsxwriter') as writer:
194
+ df.to_excel(writer, index=False)
195
+
196
+ return df