Spaces:

nat232
/

student_sample_panel

Build error

App Files Files Community

elaineaishophouse commited on Jun 4, 2025

Commit

441d880

verified ·

1 Parent(s): 97b46ba

Upload 15 files

Browse files

Files changed (15) hide show

common/.DS_Store +0 -0
common/CombineReportsMain.ipynb +1 -0
common/Config.py +183 -0
common/DataDictionary.py +134 -0
common/FastFacts.py +81 -0
common/Interview.py +75 -0
common/InterviewUtilities.py +144 -0
common/LLMConfig.py +155 -0
common/PersonalityValues.py +154 -0
common/RespondentAgent.py +153 -0
common/UserProfile.py +359 -0
common/Utilities.py +140 -0
researchsimulation/InteractiveInterviewChatbot.py +124 -0
researchsimulation/InteractiveInterviewSimulation.py +133 -0
researchsimulation/InterviewSimulation.py +196 -0

common/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

common/CombineReportsMain.ipynb ADDED Viewed

	@@ -0,0 +1 @@

+ {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyPaVbmI1rVozLNCS5uIGcEq"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","source":["!pip install dotenv\n","!pip install pydantic\n","!pip install XlsxWriter\n","!pip install openpyxl\n","!pip install pandas\n","!pip install boto3"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"_EsZoeD7g2Ap","executionInfo":{"status":"ok","timestamp":1744207805584,"user_tz":-480,"elapsed":37869,"user":{"displayName":"Elaine Ng","userId":"17781798492345444321"}},"outputId":"1cde6ec1-a6c8-4429-ce02-5f32279f7c58","collapsed":true},"execution_count":1,"outputs":[{"output_type":"stream","name":"stdout","text":["Collecting dotenv\n"," Downloading dotenv-0.9.9-py2.py3-none-any.whl.metadata (279 bytes)\n","Collecting python-dotenv (from dotenv)\n"," Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)\n","Downloading dotenv-0.9.9-py2.py3-none-any.whl (1.9 kB)\n","Downloading python_dotenv-1.1.0-py3-none-any.whl (20 kB)\n","Installing collected packages: python-dotenv, dotenv\n","Successfully installed dotenv-0.9.9 python-dotenv-1.1.0\n","Requirement already satisfied: pydantic in /usr/local/lib/python3.11/dist-packages (2.11.2)\n","Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic) (0.7.0)\n","Requirement already satisfied: pydantic-core==2.33.1 in /usr/local/lib/python3.11/dist-packages (from pydantic) (2.33.1)\n","Requirement already satisfied: typing-extensions>=4.12.2 in /usr/local/lib/python3.11/dist-packages (from pydantic) (4.13.1)\n","Requirement already satisfied: typing-inspection>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from pydantic) (0.4.0)\n","Collecting XlsxWriter\n"," Downloading XlsxWriter-3.2.2-py3-none-any.whl.metadata (2.8 kB)\n","Downloading XlsxWriter-3.2.2-py3-none-any.whl (165 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m165.1/165.1 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hInstalling collected packages: XlsxWriter\n","Successfully installed XlsxWriter-3.2.2\n","Requirement already satisfied: openpyxl in /usr/local/lib/python3.11/dist-packages (3.1.5)\n","Requirement already satisfied: et-xmlfile in /usr/local/lib/python3.11/dist-packages (from openpyxl) (2.0.0)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (2.2.2)\n","Requirement already satisfied: numpy>=1.23.2 in /usr/local/lib/python3.11/dist-packages (from pandas) (2.0.2)\n","Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas) (2.8.2)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas) (2025.2)\n","Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas) (2025.2)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n","Collecting boto3\n"," Downloading boto3-1.37.30-py3-none-any.whl.metadata (6.7 kB)\n","Collecting botocore<1.38.0,>=1.37.30 (from boto3)\n"," Downloading botocore-1.37.30-py3-none-any.whl.metadata (5.7 kB)\n","Collecting jmespath<2.0.0,>=0.7.1 (from boto3)\n"," Downloading jmespath-1.0.1-py3-none-any.whl.metadata (7.6 kB)\n","Collecting s3transfer<0.12.0,>=0.11.0 (from boto3)\n"," Downloading s3transfer-0.11.4-py3-none-any.whl.metadata (1.7 kB)\n","Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /usr/local/lib/python3.11/dist-packages (from botocore<1.38.0,>=1.37.30->boto3) (2.8.2)\n","Requirement already satisfied: urllib3!=2.2.0,<3,>=1.25.4 in /usr/local/lib/python3.11/dist-packages (from botocore<1.38.0,>=1.37.30->boto3) (2.3.0)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.38.0,>=1.37.30->boto3) (1.17.0)\n","Downloading boto3-1.37.30-py3-none-any.whl (139 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.6/139.6 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading botocore-1.37.30-py3-none-any.whl (13.5 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m96.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading jmespath-1.0.1-py3-none-any.whl (20 kB)\n","Downloading s3transfer-0.11.4-py3-none-any.whl (84 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.4/84.4 kB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hInstalling collected packages: jmespath, botocore, s3transfer, boto3\n","Successfully installed boto3-1.37.30 botocore-1.37.30 jmespath-1.0.1 s3transfer-0.11.4\n"]}]},{"cell_type":"code","execution_count":2,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"v7z1LWRyz-bh","outputId":"f235fa7a-6b34-4e2f-b4ea-2f676b96027a","executionInfo":{"status":"ok","timestamp":1744207959003,"user_tz":-480,"elapsed":153406,"user":{"displayName":"Elaine Ng","userId":"17781798492345444321"}}},"outputs":[{"output_type":"stream","name":"stdout","text":["Mounted at /content/drive\n","/content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/common\n","Environment Name: itc_frozenfood.dev1\n","Number of Respondents: 200\n","Number of Focus Groups: 200\n","Base Directory: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/.\n","Config Directory: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood\n","Test Result Directory: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/tests\n","Input Directory: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/input\n","Output Directory: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output\n","Respondent Summary File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/RawTranscriptList.xlsx\n","Focus Group Summary File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/FocusGroupProfiles.xlsx\n","Personality Question File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/Personality_Assessment.xlsx\n","Respondent Details File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/UserProfiles.xlsx\n","Data Dictionary File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/DataDictionary.xlsx\n","Personality Scoring File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/schwartz_values_scoring.txt\n","Style Tone Question File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/None\n","Interview Question File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/interview_questions_full.xlsx\n","Survey Question File: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/survey_questions.xlsx\n","Interview Validation Files: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./config/itc_frozenfood/None\n","Agent Model: groq/deepseek-r1-distill-llama-70b\n","GROQ API Key: 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\n","Model: gpt-4o\n","Open API Key: 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\n","Open Router API Key: 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\n","Processing Model Type: None\n","Processing Model Name: None\n","Processing Model API Key: None\n","Directory exists /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19\n","Processing agent P1_B2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B2_interview_results.xlsx\n","Processing agent P1_B3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B3_interview_results.xlsx\n","Processing agent P1_B4_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B4_1_interview_results.xlsx\n","Processing agent P1_B4_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B4_2_interview_results.xlsx\n","Processing agent P1_B4_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B4_3_interview_results.xlsx\n","Processing agent P1_B4_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B4_4_interview_results.xlsx\n","Processing agent P1_B4_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B4_5_interview_results.xlsx\n","Processing agent P1_B5_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B5_1_interview_results.xlsx\n","Processing agent P1_B5_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B5_2_interview_results.xlsx\n","Processing agent P1_B5_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B5_3_interview_results.xlsx\n","Processing agent P1_B5_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B5_4_interview_results.xlsx\n","Processing agent P1_B5_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B5_5_interview_results.xlsx\n","Processing agent P1_B6_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B6_1_interview_results.xlsx\n","Processing agent P1_B6_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B6_2_interview_results.xlsx\n","Processing agent P1_B6_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B6_3_interview_results.xlsx\n","Processing agent P1_B6_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B6_4_interview_results.xlsx\n","Processing agent P1_B6_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B6_5_interview_results.xlsx\n","Processing agent P1_D1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D1_interview_results.xlsx\n","Processing agent P1_D2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D2_interview_results.xlsx\n","Processing agent P1_D3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D3_interview_results.xlsx\n","Processing agent P1_D4_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D4_1_interview_results.xlsx\n","Processing agent P1_D4_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D4_2_interview_results.xlsx\n","Processing agent P1_D4_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D4_3_interview_results.xlsx\n","Processing agent P1_D4_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D4_4_interview_results.xlsx\n","Processing agent P1_D4_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D4_5_interview_results.xlsx\n","Processing agent P1_D5_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D5_1_interview_results.xlsx\n","Processing agent P1_D5_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D5_2_interview_results.xlsx\n","Processing agent P1_D5_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D5_3_interview_results.xlsx\n","Processing agent P1_D5_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D5_4_interview_results.xlsx\n","Processing agent P1_D5_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D5_5_interview_results.xlsx\n","Processing agent P1_D6_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D6_1_interview_results.xlsx\n","Processing agent P1_D6_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D6_2_interview_results.xlsx\n","Processing agent P1_D6_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D6_3_interview_results.xlsx\n","Processing agent P1_D6_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D6_4_interview_results.xlsx\n","Processing agent P1_D6_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_D6_5_interview_results.xlsx\n","Processing agent P2_B1_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B1_1_interview_results.xlsx\n","Processing agent P2_B1_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B1_2_interview_results.xlsx\n","Processing agent P2_B1_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B1_3_interview_results.xlsx\n","Processing agent P2_B1_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B1_4_interview_results.xlsx\n","Processing agent P2_B1_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B1_5_interview_results.xlsx\n","Processing agent P2_B2_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B2_1_interview_results.xlsx\n","Processing agent P2_B2_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B2_2_interview_results.xlsx\n","Processing agent P2_B2_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B2_3_interview_results.xlsx\n","Processing agent P2_B2_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B2_4_interview_results.xlsx\n","Processing agent P2_B4_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B4_1_interview_results.xlsx\n","Processing agent P2_B4_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B4_2_interview_results.xlsx\n","Processing agent P2_B4_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B4_3_interview_results.xlsx\n","Processing agent P2_B4_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B4_4_interview_results.xlsx\n","Processing agent P2_B4_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B4_5_interview_results.xlsx\n","Processing agent P2_B5_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B5_1_interview_results.xlsx\n","Processing agent P2_B5_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B5_2_interview_results.xlsx\n","Processing agent P2_B5_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B5_3_interview_results.xlsx\n","Processing agent P2_B5_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B5_4_interview_results.xlsx\n","Processing agent P2_B5_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B5_5_interview_results.xlsx\n","Processing agent P2_B6_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B6_1_interview_results.xlsx\n","Processing agent P2_B6_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B6_2_interview_results.xlsx\n","Processing agent P2_B6_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B6_3_interview_results.xlsx\n","Processing agent P2_B6_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B6_4_interview_results.xlsx\n","Processing agent P2_B6_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_B6_5_interview_results.xlsx\n","Processing agent P2_D1_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D1_1_interview_results.xlsx\n","Processing agent P2_D1_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D1_2_interview_results.xlsx\n","Processing agent P2_D1_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D1_3_interview_results.xlsx\n","Processing agent P2_D1_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D1_4_interview_results.xlsx\n","Processing agent P2_D1_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D1_5_interview_results.xlsx\n","Processing agent P1_B1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P1_B1_interview_results.xlsx\n","Processing agent P2_D2_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D2_1_interview_results.xlsx\n","Processing agent P2_D2_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D2_2_interview_results.xlsx\n","Processing agent P2_D2_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D2_3_interview_results.xlsx\n","Processing agent P2_D2_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D2_4_interview_results.xlsx\n","Processing agent P2_D2_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D2_5_interview_results.xlsx\n","Processing agent P2_D3_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D3_1_interview_results.xlsx\n","Processing agent P2_D3_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D3_2_interview_results.xlsx\n","Processing agent P2_D3_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D3_3_interview_results.xlsx\n","Processing agent P2_D3_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D3_4_interview_results.xlsx\n","Processing agent P2_D3_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D3_5_interview_results.xlsx\n","Processing agent P2_D4_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D4_1_interview_results.xlsx\n","Processing agent P2_D4_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D4_2_interview_results.xlsx\n","Processing agent P2_D4_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D4_3_interview_results.xlsx\n","Processing agent P2_D4_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D4_4_interview_results.xlsx\n","Processing agent P2_D4_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D4_5_interview_results.xlsx\n","Processing agent P2_D5_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D5_1_interview_results.xlsx\n","Processing agent P2_D5_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D5_2_interview_results.xlsx\n","Processing agent P2_D5_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D5_3_interview_results.xlsx\n","Processing agent P2_D5_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D5_4_interview_results.xlsx\n","Processing agent P2_D5_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D5_5_interview_results.xlsx\n","Processing agent P2_D6_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D6_2_interview_results.xlsx\n","Processing agent P2_D6_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D6_1_interview_results.xlsx\n","Processing agent P2_D6_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D6_3_interview_results.xlsx\n","Processing agent P2_D6_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D6_4_interview_results.xlsx\n","Processing agent P2_D6_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D6_5_interview_results.xlsx\n","Processing agent P2_D7_1\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D7_1_interview_results.xlsx\n","Processing agent P2_D7_2\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D7_2_interview_results.xlsx\n","Processing agent P2_D7_3\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D7_3_interview_results.xlsx\n","Processing agent P2_D7_4\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D7_4_interview_results.xlsx\n","Processing agent P2_D7_5\n","Processing file: /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/P2_D7_5_interview_results.xlsx\n","✅ All reports written to /content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/./data/itc_frozenfood/dev1/output/interviewresponses_04-08_10-19/interview_results_FULL_REPORT.xlsx\n"]}],"source":["from google.colab import drive\n","import sys\n","import os\n","import time\n","\n","drive.mount('/content/drive')\n","\n","base_dir = '/content/drive/MyDrive/Colab Notebooks/PreData.AI/src/predataai/.'\n","common_dir = f'{base_dir}/common'\n","run_dir = f'{base_dir}/common'\n","\n","sys.path.append(common_dir)\n","%cd {run_dir}\n","\n","from Config import Config\n","from Utilities import *\n","from UserProfile import *\n","from Interview import *\n","from PersonalityValues import *\n","from itertools import islice\n","\n","now = datetime.datetime.now()\n","timestamp = now.strftime(\"%m-%d_%H-%M\")\n","\n","# MAIN\n","Config.load_environment(base_dir, \"itc_frozenfood.dev1\")\n","Config.print_environment()\n","\n","# Specify report directory\n","report_type = \"interview_results\"\n","report_dir = f\"{Config.output_dir}/interviewresponses_04-08_10-19\"\n","full_report_file = f\"{report_dir}/{report_type}_FULL_REPORT.xlsx\"\n","\n","# Initialize an empty DataFrame to store the combined interview responses\n","full_report_df = None\n","\n","if os.path.exists(report_dir):\n"," print(f\"Directory exists {report_dir}\")\n","else:\n"," print(f\"Directory does not exist {report_dir}\")\n"," sys.exit()\n","\n","report_files = [\n"," filename for filename in os.listdir(report_dir)\n"," if f\"{report_type}.xlsx\" in filename\n"," ]\n","\n","if not report_files:\n"," print(f\"No report files named *{report_type}* were found in {report_dir}\")\n"," sys.exit()\n","\n","for report_file in report_files:\n"," respondent_agent_id = report_file.split(f\"_{report_type}\")[0]\n"," print(f\"Processing agent {respondent_agent_id}\")\n"," interview_output_file = f'{report_dir}/{respondent_agent_id}_interview_results.xlsx'\n","\n"," if os.path.exists(interview_output_file):\n"," print(f\"Processing file: {interview_output_file}\")\n"," interview_response_df = pd.read_excel(interview_output_file)\n"," # Add \"Report Name\" column **before appending** (Pandas automatically places it first)\n"," interview_response_df.insert(0, \"Respondent Agent\", respondent_agent_id)\n","\n"," # Append to the full DataFrame\n"," full_report_df = (\n"," interview_response_df if full_report_df is None\n"," else pd.concat([full_report_df, interview_response_df], ignore_index=True)\n"," )\n"," else:\n"," print(f\"File '{interview_output_file}' does not exist.\")\n","\n","\n","if full_report_df is not None and not full_report_df.empty:\n"," full_report_df.to_excel(full_report_file, index=False)\n"," print(f\"✅ All reports written to {full_report_file}\")\n","else:\n"," print(f\"⚠️ No reports were processed. {full_report_file} was not created.\")"]},{"cell_type":"markdown","source":[],"metadata":{"id":"8eaYo-b24Wvg"}}]}

common/Config.py ADDED Viewed

	@@ -0,0 +1,183 @@

+from dotenv import load_dotenv
+import os
+class Config:
+    # === General Environment Info ===
+    env_name                           = None
+    num_respondents                    = None
+    num_focus_groups                   = None
+    # === Directories and Files ===
+    base_dir                           = None
+    config_dir                         = None
+    test_result_dir                    = None
+    input_dir                          = None
+    output_dir                         = None
+    respondent_summary_file            = None
+    focus_group_summary_file           = None
+    respondent_details_file            = None
+    data_dictionary_file               = None
+    personality_question_file          = None
+    personality_scoring_file           = None
+    style_tone_question_file           = None
+    interview_question_file            = None
+    survey_question_file               = None
+    interview_validation_files         = None
+    # === Respondent Agent Configs ===
+    respondent_agent_host              = None
+    respondent_agent_model             = None
+    respondent_agent_api_key           = None
+    respondent_agent_url               = None
+    respondent_agent_temperature       = None
+    respondent_agent_top_p             = None
+    respondent_agent_frequency_penalty = None
+    respondent_agent_presence_penalty  = None
+    # === Processing Agent Configs ===
+    processing_agent_host              = None
+    processing_agent_model             = None
+    processing_agent_api_key           = None
+    processing_agent_url               = None
+    processing_agent_temperature       = None
+    processing_agent_top_p             = None
+    processing_agent_frequency_penalty = None
+    processing_agent_presence_penalty  = None
+    # === Processor Configs ===
+    processor_host                     = None
+    processor_model                    = None
+    processor_api_key                  = None
+    processor_url                      = None
+    processor_temperature              = None
+    processor_top_p                    = None
+    processor_frequency_penalty        = None
+    processor_presence_penalty         = None
+    # Function to load the environment variables based on the given environment name
+    @classmethod
+    def load_environment(cls, base_dir, my_env_name):
+        # Determine the path to the .env file based on the environment name
+        env_file = f'{base_dir}/config/{my_env_name}.env'  # Update the base path as needed
+        # Load the environment variables from the specified .env file
+        load_dotenv(dotenv_path=env_file)
+        cls.base_dir                           = base_dir
+        cls.env_name                           = my_env_name
+        cls.num_respondents                    = int(os.getenv('NUM_RESPONDENTS', 0))
+        cls.num_focus_groups                   = int(os.getenv('NUM_FOCUS_GROUPS', 0))
+        # Construct paths based on BASE_DIR and subdirectories/filenames
+        cls.config_dir                         = f"{base_dir}/{os.getenv('CONFIG_SUBDIR')}"
+        cls.test_result_dir                    = f"{base_dir}/{os.getenv('TEST_SUBDIR')}"
+        cls.input_dir                          = f"{base_dir}/{os.getenv('INPUT_SUBDIR')}"
+        cls.output_dir                         = f"{base_dir}/{os.getenv('OUTPUT_SUBDIR')}"
+        cls.respondent_summary_file            = f"{cls.config_dir}/{os.getenv('RESPONDENT_SUMMARY_FILE')}"
+        cls.focus_group_summary_file           = f"{cls.config_dir}/{os.getenv('FOCUS_GROUP_SUMMARY_FILE')}"
+        cls.respondent_details_file            = f"{cls.config_dir}/{os.getenv('RESPONDENT_DETAILS_FILE')}"
+        cls.data_dictionary_file               = f"{cls.config_dir}/{os.getenv('DATA_DICTIONARY_FILE')}"
+        cls.personality_question_file          = f"{cls.config_dir}/{os.getenv('PERSONALITY_QUESTION_FILE')}"
+        cls.personality_scoring_file           = f"{cls.config_dir}/{os.getenv('PERSONALITY_SCORING_FILE')}"
+        cls.style_tone_question_file           = f"{cls.config_dir}/{os.getenv('STYLE_TONE_QUESTION_FILE')}"
+        cls.interview_question_file            = f"{cls.config_dir}/{os.getenv('INTERVIEW_QUESTION_FILE')}"
+        cls.survey_question_file               = f"{cls.config_dir}/{os.getenv('SURVEY_QUESTION_FILE')}"
+        cls.interview_validation_files         = f"{cls.config_dir}/{os.getenv('INTERVIEW_VALIDATION_FILES')}"
+        # Respondent Agent Model: Load the environment variables, API keys, and parameters
+        cls.respondent_agent_host              = os.getenv(os.getenv("RESPONDENT_AGENT_HOST"))
+        cls.respondent_agent_model             = os.getenv(os.getenv("RESPONDENT_AGENT_MODEL"))
+        respondent_agent_prefix                = (lambda: os.getenv('RESPONDENT_AGENT_HOST').replace('_AGENT_HOST', ''))()
+        cls.respondent_agent_api_key           = os.getenv(f"{respondent_agent_prefix}_API_KEY")
+        cls.respondent_agent_url               = os.getenv(f"{respondent_agent_prefix}_URL")
+        cls.respondent_agent_temperature       = float(os.getenv(f"{respondent_agent_prefix}_TEMPERATURE", 0.0))
+        cls.respondent_agent_top_p             = float(os.getenv(f"{respondent_agent_prefix}_TOP_P", 0.0))
+        cls.respondent_agent_frequency_penalty = float(os.getenv(f"{respondent_agent_prefix}_FREQUENCY_PENALTY", 0.0))
+        cls.respondent_agent_presence_penalty  = float(os.getenv(f"{respondent_agent_prefix}_PRESENCE_PENALTY", 0.0))
+        # Processing Agent Model: Load the environment variables, API keys, and parameters
+        cls.processing_agent_host              = os.getenv(os.getenv("PROCESSING_AGENT_HOST"))
+        cls.processing_agent_model             = os.getenv(os.getenv("PROCESSING_AGENT_MODEL"))
+        processing_agent_prefix                = (lambda: os.getenv('PROCESSING_AGENT_HOST').replace('_AGENT_HOST', ''))()
+        cls.processing_agent_api_key           = os.getenv(f"{processing_agent_prefix}_API_KEY")
+        cls.processing_agent_url               = os.getenv(f"{processing_agent_prefix}_URL")
+        cls.processing_agent_temperature       = float(os.getenv(f"{processing_agent_prefix}_TEMPERATURE", 0.0))
+        cls.processing_agent_top_p             = float(os.getenv(f"{processing_agent_prefix}_TOP_P", 0.0))
+        cls.processing_agent_frequency_penalty = float(os.getenv(f"{processing_agent_prefix}_FREQUENCY_PENALTY", 0.0))
+        cls.processing_agent_presence_penalty  = float(os.getenv(f"{processing_agent_prefix}_PRESENCE_PENALTY", 0.0))
+        # Processor Model: Load the environment variables, API keys, and parameters
+        cls.processor_host                     = os.getenv(os.getenv("PROCESSOR_HOST"))
+        cls.processor_model                    = os.getenv(os.getenv("PROCESSOR_MODEL"))
+        processor_prefix                       = (lambda: os.getenv('PROCESSOR_HOST').replace('_AGENT_HOST', ''))()
+        cls.processor_api_key                  = os.getenv(f"{processor_prefix}_API_KEY")
+        cls.processor_url                      = os.getenv(f"{processor_prefix}_URL")
+        cls.processor_temperature              = float(os.getenv(f"{processor_prefix}_TEMPERATURE", 0.0))
+        cls.processor_top_p                    = float(os.getenv(f"{processor_prefix}_TOP_P", 0.0))
+        cls.processor_frequency_penalty        = float(os.getenv(f"{processor_prefix}_FREQUENCY_PENALTY", 0.0))
+        cls.processor_presence_penalty         = float(os.getenv(f"{processor_prefix}_PRESENCE_PENALTY", 0.0))
+    @classmethod
+    def print_environment(cls):
+        print("ENVIRONMENT CONFIGURATION")
+        print(f"Environment Name:                   {cls.env_name}")
+        print(f"Number of Respondents:              {cls.num_respondents}")
+        print(f"Number of Focus Groups:             {cls.num_focus_groups}")
+        print("\nDIRECTORIES:")
+        print(f"Base Directory:                     {cls.base_dir}")
+        print(f"Config Directory:                   {cls.config_dir}")
+        print(f"Test Result Directory:              {cls.test_result_dir}")
+        print(f"Input Directory:                    {cls.input_dir}")
+        print(f"Output Directory:                   {cls.output_dir}")
+        print("\nFILES:")
+        print(f"Respondent Summary File:            {cls.respondent_summary_file}")
+        print(f"Focus Group Summary File:           {cls.focus_group_summary_file}")
+        print(f"Personality Question File:          {cls.personality_question_file}")
+        print(f"Respondent Details File:            {cls.respondent_details_file}")
+        print(f"Data Dictionary File:               {cls.data_dictionary_file}")
+        print(f"Personality Scoring File:           {cls.personality_scoring_file}")
+        print(f"Style Tone Question File:           {cls.style_tone_question_file}")
+        print(f"Interview Question File:            {cls.interview_question_file}")
+        print(f"Survey Question File:               {cls.survey_question_file}")
+        print(f"Interview Validation Files:         {cls.interview_validation_files}")
+        print("\nRESPONDENT AGENT CONFIGS")
+        print(f"Respondent Agent Host:              {cls.respondent_agent_host}")
+        print(f"Respondent Agent Model:             {cls.respondent_agent_model}")
+        print(f"Respondent Agent API Key:           {cls.respondent_agent_api_key}")
+        print(f"Respondent Agent URL:               {cls.respondent_agent_url}")
+        print(f"Respondent Agent Temperature:       {cls.respondent_agent_temperature}")
+        print(f"Respondent Agent Top P:             {cls.respondent_agent_top_p}")
+        print(f"Respondent Agent Frequency Penalty: {cls.respondent_agent_frequency_penalty}")
+        print(f"Respondent Agent Presence Penalty:  {cls.respondent_agent_presence_penalty}")
+        print("\nPROCESSING AGENT CONFIGS")
+        print(f"Processing Agent Host:              {cls.processing_agent_host}")
+        print(f"Processing Agent Name:              {cls.processing_agent_model}")
+        print(f"Processing Agent API Key:           {cls.processing_agent_api_key}")
+        print(f"Processing Agent URL:               {cls.processing_agent_url}")
+        print(f"Processing Agent Temperature:       {cls.processing_agent_temperature}")
+        print(f"Processing Agent Top P:             {cls.processing_agent_top_p}")
+        print(f"Processing Agent Frequency Penalty: {cls.processing_agent_frequency_penalty}")
+        print(f"Processing Agent Presence Penalty:  {cls.processing_agent_presence_penalty}")
+        print("\nPROCESSOR CONFIGS")
+        print(f"Processor Host:                     {cls.processor_host}")
+        print(f"Processor Name:                     {cls.processor_model}")
+        print(f"Processor API Key:                  {cls.processor_api_key}")
+        print(f"Processor URL:                      {cls.processor_url}")
+        print(f"Processor Temperature:              {cls.processor_temperature}")
+        print(f"Processor Top P:                    {cls.processor_top_p}")
+        print(f"Processor Frequency Penalty:        {cls.processor_frequency_penalty}")
+        print(f"Processor Presence Penalty:         {cls.processor_presence_penalty}")

common/DataDictionary.py ADDED Viewed

	@@ -0,0 +1,134 @@

+import re
+import datetime
+import textwrap
+from Config import Config
+import pandas as pd
+import numpy as np
+class DataDictionary:
+    def __init__(self):
+        """
+        Initialize the DataDictionary instance with an empty list of entries.
+        """
+        self.entries = []
+    def add_entry(self, entry):
+        """
+        Add an entry to the data dictionary. Entry should be a dict with expected keys.
+        Filters out None or empty-string values, and ensures required keys are present.
+        """
+        required_keys = {"Type", "Parameter", "Description"}
+        missing = required_keys - entry.keys()
+        if missing:
+            raise ValueError(f"Missing required fields in entry: {missing}")
+        # Optionally filter or transform the entry
+        clean_entry = {k: v for k, v in entry.items() if v is not None and v != ""}
+        self.entries.append(clean_entry)
+    def get_types(self):
+        """
+        Extract all types defined for the data dictionary, preserving insertion order.
+        Returns:
+        list: A list of all unique types in the dictionary, preserving order.
+        """
+        seen = set()
+        ordered_types = []
+        for entry in self.entries:
+            Type = entry.get("Type")
+            if Type not in seen and Type is not None:
+                seen.add(Type)
+                ordered_types.append(Type)
+        return ordered_types
+    def get_parameters(self, type="All"):
+        """
+        Extract parameters of a particular type from the data dictionary, preserving insertion order.
+        Args:
+        type (str): Type of entries to return (defaults to "All").
+        Returns:
+        list: A list of all unique parameters matching the specified type, preserving order.
+        """
+        seen = set()
+        ordered_parameters = []
+        for entry in self.entries:
+            if type == "All" or entry["Type"] == type:
+                parameter = entry["Parameter"]
+                if parameter not in seen:
+                    seen.add(parameter)
+                    ordered_parameters.append(parameter)
+        return ordered_parameters
+    def get_columns(self):
+        """
+        Generate a list of column names in the format type_parameter.
+        Returns:
+        list: A list of column names preserving order.
+        """
+        columns = []
+        for entry in self.entries:
+            Type = entry["Type"]
+            Parameter = entry["Parameter"]
+            if Type and Parameter:  # Ensure both Type and Parameter exist
+                columns.append(f"{Type}_{Parameter}")
+        return columns
+    def filter_entries(self, Source=None, Type=None, Parameter=None):
+        """
+        Filter entries based on Source, Type, or Parameter.
+        Args:
+        Source (str, optional): The source to filter by.
+        Type (str, optional): The type to filter by.
+        Parameter (str, optional): The parameter to filter by.
+        Returns:
+        list: A list of entries matching the filter criteria.
+        """
+        return [
+            entry for entry in self.entries
+            if (Source is None or entry["Source"] == Source) and
+               (Type is None or entry["Type"] == Type) and
+               (Parameter is None or entry["Parameter"] == Parameter)
+        ]
+    @staticmethod
+    def generate_dictionary(data_dictionary_file):
+        """
+        Static method to generate a DataDictionary instance from an Excel (.xlsx) file.
+        Args:
+        data_dictionary_file (str): The path to the Excel file containing data dictionary entries.
+        Returns:
+        DataDictionary: A populated DataDictionary instance.
+        """
+        import pandas as pd  # Ensure pandas is imported
+        df = pd.read_excel(data_dictionary_file)
+        data_dictionary = DataDictionary()
+        for _, row in df.iterrows():
+            data_dictionary.add_entry({
+                "Type": row["Type"],
+                "Parameter": row["Parameter"],
+                "Description": row["Description"],
+                "Source": row.get("Source"),
+                "ValidValues": row.get("Scoring_Method"),
+                "InferredLogic": row.get("Inferred_Logic"),
+            })
+        return data_dictionary
+    def __repr__(self):
+        return f"DataDictionary({len(self.entries)} entries)"

common/FastFacts.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import re
+import datetime
+import textwrap
+from Config import Config
+from DataDictionary import *
+import pandas as pd
+import numpy as np
+class FastFacts:
+    def __init__(self):
+        self.facts = None  # Lazily initialised
+    def add_fact(self, fact):
+        """
+        Add a single fact to the list, ensuring lazy initialisation.
+        """
+        if not isinstance(fact, str):
+            print("Only strings are allowed as facts.")
+            return
+        # Initialise the list if it doesn't exist
+        if self.facts is None:
+            self.facts = []
+        self.facts.append(fact)
+    def add_facts(self, facts):
+        """
+        Add multiple facts to the list, ensuring lazy initialisation.
+        """
+        if not isinstance(facts, (set, list)):
+            print("Facts must be provided as a set or list.")
+            return
+        # Initialise the list if it doesn't exist
+        if self.facts is None:
+            self.facts = []
+        for fact in facts:
+            if isinstance(fact, str):
+                self.facts.append(fact)
+            else:
+                print(f"Skipping non-string fact: {fact}")
+    def __repr__(self):
+        if not self.facts:
+            return f"{self.__class__.__name__}: No facts available"
+        formatted_facts = ", ".join(f"<{fact}>" for fact in self.facts)
+        return f"{self.__class__.__name__}: {formatted_facts}"
+    def to_dict(self):
+        """
+        Convert the FastFacts to a dictionary. Return an empty list if no facts are available.
+        """
+        return {"facts": self.facts or []}
+    @staticmethod
+    def read_from_excel(fact_file):
+        """
+        Read facts from an Excel file and populate a FastFacts object.
+        Args:
+        fact_file (str): Path to the Excel file.
+        Returns:
+        FastFacts: A populated FastFacts object.
+        """
+        try:
+            df = pd.read_excel(fact_file)
+            facts_list = df["FastFacts"].dropna().tolist()  # Assuming the facts are in a column named 'FastFacts'
+            # Create a FastFacts object and populate it with facts
+            fast_facts_obj = FastFacts()
+            fast_facts_obj.add_facts(facts_list)
+            return fast_facts_obj
+        except Exception as e:
+            print(f"An error occurred while reading from the Excel file: {e}")
+            return None

common/Interview.py ADDED Viewed

	@@ -0,0 +1,75 @@

+from typing import List, Optional
+from pydantic import BaseModel
+import pandas as pd
+from itertools import groupby
+class QAEntry(BaseModel):
+    Num: int
+    Section: str
+    Question: str
+    Expected_Output: Optional[str]
+    Respondent: Optional[str]
+    Answer: Optional[str]
+class InterviewReport(BaseModel):
+    Entries: List[QAEntry]
+    def __repr__(self):
+        output = ""
+        for section, entries in groupby(self.Entries, key=lambda entry: entry.Section):
+            output += f"{section}:\n"
+            for entry in entries:
+                output += f"Q {entry.Num}: {entry.Question}\n"
+                output += f"Expected Output: {entry.Expected_Output if entry.Expected_Output else 'No Expected Output'}\n"
+                output += f"Respondent: {entry.Respondent if entry.Respondent else 'No Respondent'}\n"
+                output += f"A: {entry.Answer if entry.Answer else 'No Answer'}\n"
+        return output
+    def get_respondent_responses(self,respondent):
+        respondent_entries = [
+          entry for entry in self.Entries
+          if entry.Respondent and entry.Respondent.lower() == respondent.lower()
+        ]
+        return respondent_entries
+    @staticmethod
+    def generate_interview_script(interview_file):
+        df = pd.read_excel(interview_file)
+        qa_entries = []
+        for idx, row in enumerate(df.to_dict('records')):
+            print(f"Processing row {idx}: {row}")  # Debug: show the full row being processed
+            entry = QAEntry(
+                Num              = row['Num'],
+                Section          = row['Section'],
+                Question         = row['Question'],
+                Expected_Output  = row.get('Expected_Output') if pd.notna(row.get('Expected_Output')) else None,
+                Respondent       = None,
+                Answer           = None
+            )
+            qa_entries.append(entry)
+        return InterviewReport(Entries = qa_entries)
+    @staticmethod
+    def generate_interview_report(interview_file):
+        df = pd.read_excel(interview_file)
+        qa_entries = [
+            QAEntry(
+                Num        = row['Num'],
+                Section    = row['Section'],
+                Question   = row['Question'],
+                Expected_Output = row.get('Expected_Output') if pd.notna(row.get('Expected_Output')) else "No Expected Output Provided",
+                Respondent = row.get('Respondent') if pd.notna(row.get('Respondent')) else "No Respondent Provided",
+                Answer     = row.get('Answer') if pd.notna(row.get('Answer')) else "No Answer Provided"
+            )
+            for row in df.to_dict('records')
+        ]
+        return InterviewReport(Entries = qa_entries)

common/InterviewUtilities.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import re
+import json
+def parse_expected_output_fields(expected_output_text):
+    """
+    Parses expected_output_text into a list of (key, description) tuples.
+    """
+    fields = []
+    lines = expected_output_text.strip().splitlines()
+    for line in lines:
+        if ':' not in line:
+            continue
+        key, description = line.split(':', 1)
+        fields.append((key.strip(), description.strip()))
+    return fields
+def extract_fields_from_expected_output(expected_output_text):
+    """
+    Returns just the list of keys (field names) from expected_output_text.
+    """
+    parsed_fields = parse_expected_output_fields(expected_output_text)
+    return [key for key, _ in parsed_fields]
+def split_json_string(text):
+    """
+    Best of both worlds:
+    - Splits text into 'thought' and 'JSON' parts
+    - Scans for all possible { positions
+    - Cleans unescaped newlines inside quotes
+    - Strips junk between </think> and JSON if JSON exists
+    - Preserves full text after </think> if no JSON
+    """
+    # Step 1: Split at </think> if exists
+    if '</think>' in text:
+        thought_part, possible_json_part = text.split('</think>', 1)
+        thought_part = thought_part.strip()
+        possible_json_part = possible_json_part.strip()
+    else:
+        thought_part = None
+        possible_json_part = text.strip()
+    # Step 2: Find all { positions
+    brace_positions = [m.start() for m in re.finditer(r'{', possible_json_part)]
+    # Clean function: fix newlines inside quoted strings
+    def clean_json_formatting(text):
+        def fix_inside_quotes(match):
+            content = match.group(1)
+            fixed = content.replace('\n', '\\n').replace('\r', '\\n')
+            return f'"{fixed}"'
+        return re.sub(r'"(.*?)"', fix_inside_quotes, text, flags=re.DOTALL)
+    for pos in brace_positions:
+        candidate = possible_json_part[pos:].strip()
+        # Pre-clean
+        candidate = clean_json_formatting(candidate)
+        # Fix double braces if necessary
+        if candidate.startswith("{{") and "}}" in candidate:
+            candidate = candidate.replace("{{", "{", 1).replace("}}", "}", 1)
+        # Must start with {" or {'
+        if not re.match(r'^\{\s*["\']', candidate):
+            continue  # not real JSON, skip
+        try:
+            json.loads(candidate)
+            # ✅ Successful parse
+            return thought_part, candidate
+        except json.JSONDecodeError:
+            continue  # try next
+    # 🛠 No valid JSON found — return thought and full original remainder (no chopping)
+    return thought_part, possible_json_part
+def extract_and_parse_json(result_text):
+    """
+    Extracts and parses JSON output, handling cases where JSON is enclosed in triple backticks
+    (```json ... ```) or already correctly formatted `{}`.
+    Args:
+        result_text (str): The raw text output containing JSON data.
+    Returns:
+        dict or None: Parsed JSON object if successful, None otherwise.
+    """
+    if not result_text:
+        print("🚨 No result text data received.")
+        return None
+    # 🛠 Clean unescaped line breaks that often break LLM JSON output
+    def clean_json_formatting(text):
+        # Replace unescaped newlines with a space
+        return re.sub(r'(?<!\\)\n', ' ', text)
+    # ✅ Try parsing directly after cleaning line breaks
+    cleaned_direct = clean_json_formatting(result_text)
+    try:
+        return json.loads(cleaned_direct)
+    except json.JSONDecodeError:
+        print("Unable to parse cleaned direct JSON.")
+        pass
+    # ✅ Try extracting JSON from triple backticks
+    match = re.search(r'```json\s*\n({[\s\S]+?})\n```', result_text, re.DOTALL)
+    if match:
+        try:
+            return json.loads(match.group(1).strip())
+        except json.JSONDecodeError:
+            pass  # If still invalid, return None
+    print("🚨 No valid JSON found.")
+    return None  # No valid JSON detected
+def generate_json_expected_output(expected_output_text):
+    """
+    Generates a JSON-style expected output based on expected_output_text.
+    """
+    parsed_fields = parse_expected_output_fields(expected_output_text)
+    json_fields = []
+    for key, description in parsed_fields:
+        # Convert to JSON-style key (lowercase, underscores preserved)
+        json_key = key.lower()
+        json_fields.append(f'    "{json_key}": {description},')
+    # Remove trailing comma from the last entry
+    if json_fields:
+        json_fields[-1] = json_fields[-1].rstrip(',')
+    # Join fields
+    json_body = "\n".join(json_fields)
+    output = (
+        "You must return your answer strictly in the following JSON format. "
+        "Do not include any markdown, commentary, or extra text. The response must be valid JSON:\n\n"
+        "{\n"
+        f"{json_body}\n"
+        "}"
+    )
+    return output

common/LLMConfig.py ADDED Viewed

	@@ -0,0 +1,155 @@

+from langchain_openai import ChatOpenAI
+from langchain_groq import ChatGroq
+from langchain_together import ChatTogether
+from crewai import LLM
+from Config import Config
+import os
+# ========== PUBLIC INTERFACE ==========
+def get_respondent_agent_llm_instance(model_type=None):
+    # Default to Config if model_type is not specified
+    if not model_type:
+        model_type        = Config.respondent_agent_host
+        model             = Config.respondent_agent_model
+        api_key           = Config.respondent_agent_api_key
+        url               = Config.respondent_agent_url
+        temperature       = Config.respondent_agent_temperature
+        top_p             = Config.respondent_agent_top_p
+        frequency_penalty = Config.respondent_agent_frequency_penalty
+        presence_penalty  = Config.respondent_agent_presence_penalty
+    # If model_type is specified, determine the prefix (e.g., "GROQ" for model_type="groq") to fetch values from env
+    else:
+        prefix            = model_type.upper()
+        model             = os.getenv(f"{prefix}_AGENT_MODEL")
+        api_key           = os.getenv(f"{prefix}_API_KEY")
+        url               = os.getenv(f"{prefix}_URL")
+        temperature       = float(os.getenv(f"{prefix}_TEMPERATURE", 0.7))
+        top_p             = float(os.getenv(f"{prefix}_TOP_P", 1.0))
+        frequency_penalty = float(os.getenv(f"{prefix}_FREQUENCY_PENALTY", 0.0))
+        presence_penalty  = float(os.getenv(f"{prefix}_PRESENCE_PENALTY", 0.0))
+    if not api_key:
+        raise ValueError(f"API key not found for model_type={model_type}.")
+    if not model:
+        raise ValueError(f"Model not found for model_type={model_type}.")
+    print(f"Respondent Agent LLM: model_type={model_type}, model={model}, api_key={'*****' if api_key else 'MISSING'}, url={url}")
+    print(f"Params: temperature={temperature}, top_p={top_p}, frequency_penalty={frequency_penalty}, presence_penalty={presence_penalty}")
+    return get_crewai_instance(model_type, model, api_key, url, temperature, top_p, frequency_penalty, presence_penalty)
+def get_processing_agent_llm_instance(model_type=None):
+    # Default to Config if model_type not specified
+    if not model_type:
+        model_type        = Config.processing_agent_host
+        model             = Config.processing_agent_model
+        api_key           = Config.processing_agent_api_key
+        url               = Config.processing_agent_url
+        temperature       = Config.processing_agent_temperature
+        top_p             = Config.processing_agent_top_p
+        frequency_penalty = Config.processing_agent_frequency_penalty
+        presence_penalty  = Config.processing_agent_presence_penalty
+    # If model_type is specified, determine the prefix (e.g., "GROQ" for model_type="groq") to fetch values from env
+    else:
+        prefix            = model_type.upper()
+        model             = os.getenv(f"{prefix}_AGENT_MODEL")
+        api_key           = os.getenv(f"{prefix}_API_KEY")
+        url               = os.getenv(f"{prefix}_URL")
+        temperature       = float(os.getenv(f"{prefix}_TEMPERATURE", 0.7))
+        top_p             = float(os.getenv(f"{prefix}_TOP_P", 1.0))
+        frequency_penalty = float(os.getenv(f"{prefix}_FREQUENCY_PENALTY", 0.0))
+        presence_penalty  = float(os.getenv(f"{prefix}_PRESENCE_PENALTY", 0.0))
+    if not api_key:
+        raise ValueError(f"API key not found for model_type={model_type}.")
+    if not model:
+        raise ValueError(f"Model not found for model_type={model_type}.")
+    print(f"Processing Agent LLM: model_type={model_type}, model={model}, api_key={'*****' if api_key else 'MISSING'}, url={url}")
+    print(f"Params: temperature={temperature}, top_p={top_p}, frequency_penalty={frequency_penalty}, presence_penalty={presence_penalty}")
+    return get_crewai_instance(model_type, model, api_key, url, temperature, top_p, frequency_penalty, presence_penalty)
+def get_processor_llm_instance(model_type=None):
+    # Default to Config if model_type not specified
+    if not model_type:
+        model_type        = Config.processor_host
+        model             = Config.processor_model
+        api_key           = Config.processor_api_key
+        url               = Config.processor_url
+        temperature       = Config.processor_temperature
+        top_p             = Config.processor_top_p
+        frequency_penalty = Config.processor_frequency_penalty
+        presence_penalty  = Config.processor_presence_penalty
+    # If model_type is specified, determine the prefix (e.g., "GROQ" for model_type="groq") to fetch values from env
+    else:
+        prefix            = model_type.upper()
+        model             = os.getenv(f"{prefix}_AGENT_MODEL")
+        api_key           = os.getenv(f"{prefix}_API_KEY")
+        url               = os.getenv(f"{prefix}_URL")
+        temperature       = float(os.getenv(f"{prefix}_TEMPERATURE", 0.7))
+        top_p             = float(os.getenv(f"{prefix}_TOP_P", 1.0))
+        frequency_penalty = float(os.getenv(f"{prefix}_FREQUENCY_PENALTY", 0.0))
+        presence_penalty  = float(os.getenv(f"{prefix}_PRESENCE_PENALTY", 0.0))
+    if not api_key:
+        raise ValueError(f"API key not found for model_type={model_type}.")
+    if not model:
+        raise ValueError(f"Model not found for model_type={model_type}.")
+    print(f"Processor LLM: model_type={model_type}, model={model}, api_key={'*****' if api_key else 'MISSING'}, url={url}")
+    print(f"Params: temperature={temperature}, top_p={top_p}, frequency_penalty={frequency_penalty}, presence_penalty={presence_penalty}")
+    return get_langchain_instance(model_type, model, api_key, url, temperature, top_p, frequency_penalty, presence_penalty)
+# ========== INTERNAL HELPERS ==========
+def get_crewai_instance(model_type, model, api_key, url, temperature, top_p, frequency_penalty, presence_penalty):
+    model_type = model_type.lower()
+    if model_type == 'groq':
+        return ChatGroq(groq_api_key=api_key, model_name=f"{model_type}/{model}", temperature=temperature, model_kwargs={})
+    common_args = {
+        "temperature": temperature,
+        "top_p": top_p,
+        "frequency_penalty": frequency_penalty,
+        "presence_penalty": presence_penalty
+    }
+    common_args = {k: v for k, v in common_args.items() if v is not None} # Remove None values
+    if model_type == 'openai':
+        return ChatOpenAI(model=model, api_key=api_key, **common_args)
+    elif model_type == 'openrouter':
+        return ChatOpenAI(base_url=url, model=f"{model_type}/{model}", api_key=api_key, **common_args)
+    elif model_type == 'together_ai':
+        return LLM(model=f"{model_type}/{model}", api_key=api_key, api_base=url, **common_args)
+    else:
+        raise ValueError(f"Unsupported model type for CrewAI: {model_type}")
+def get_langchain_instance(model_type, model, api_key, url, temperature, top_p, frequency_penalty, presence_penalty):
+    model_type = model_type.lower()
+    if model_type == 'groq':
+        return ChatGroq(groq_api_key=api_key, model_name=model, temperature=temperature, model_kwargs={})
+    common_args = {
+        "temperature": temperature,
+        "top_p": top_p,
+        "frequency_penalty": frequency_penalty,
+        "presence_penalty": presence_penalty
+    }
+    common_args = {k: v for k, v in common_args.items() if v is not None}  # Remove None values
+    if model_type == 'openai':
+        return ChatOpenAI(model=model, api_key=api_key, **common_args)
+    elif model_type == 'openrouter':
+        return ChatOpenAI(base_url=url, model=model, api_key=api_key, **common_args)
+    elif model_type == 'together_ai':
+        return ChatTogether(model=model, together_api_key=api_key, **common_args)
+    else:
+        raise ValueError(f"Unsupported model type for LangChain: {model_type}")

common/PersonalityValues.py ADDED Viewed

	@@ -0,0 +1,154 @@

+from pydantic import BaseModel
+from typing import List, Dict, Optional
+from collections import defaultdict
+import datetime
+import json
+import os
+import pandas as pd
+import re
+import numpy as np
+import pprint
+import math
+from UserProfile import *
+class PVEntry(BaseModel):
+    Num: int
+    Value: str
+    Question: str
+    Criteria: Optional[str]
+    Rating_Definition: Optional[str]
+    Adjacent_Values: Optional[List[str]]
+    Opposite_Values: Optional[List[str]]
+    Answer: Optional[str]
+    Score: Optional[int]
+    Assessment: Optional[str]
+def parse_values(value_str: Optional[str], delimiter: str = ",") -> List[str]:
+    """
+    Parses a delimited string into a list of strings.
+    If the value is None or NaN, return an empty list.
+    Args:
+        value_str (Optional[str]): The input string to parse.
+        delimiter (str): The delimiter to use for splitting. Defaults to ','.
+    Returns:
+        List[str]: A list of trimmed strings.
+    """
+    if pd.isna(value_str) or not isinstance(value_str, str):
+        return []
+    return [v.strip() for v in value_str.split(delimiter)]
+def extract_values_from_assessment_file(assessment_file):
+    """
+    Extracts and aggregates Value and Score pairs from an Excel file by summing scores.
+    Args:
+        assessment_file (str): Path to the Excel file.
+    Returns:
+        list: A list of dictionaries with Value and total Score.
+    """
+    # Read the Excel file
+    df = pd.read_excel(assessment_file)
+    # Ensure required columns are present
+    if "Value" not in df.columns or "Score" not in df.columns:
+        raise ValueError("The file must contain 'Value' and 'Score' columns.")
+    # Clean the data
+    df_clean = df[["Value", "Score"]].dropna()
+    df_clean["Score"] = pd.to_numeric(df_clean["Score"], errors="coerce")
+    # Group by Value and sum the scores
+    aggregated = df_clean.groupby("Value", as_index=False).sum()
+    # Convert to list of dictionaries
+    return aggregated.to_dict(orient="records")
+class PVAssessment(BaseModel):
+    Entries: dict[str, list[PVEntry]]
+    @staticmethod
+    def generate_personality_assessment(personality_file):
+        df = pd.read_excel(personality_file)
+        # Use defaultdict to allow appending multiple PVEntries per value
+        entries = defaultdict(list)
+        for _, row in df.iterrows():
+            pv_entry = PVEntry(
+                Num=row["Num"],
+                Value=row["Value"],
+                Question=row["Assessment_Question"],
+                Criteria=row["Assessment_Criteria"],
+                Rating_Definition=row["Rating_Definition"],
+                Adjacent_Values=parse_values(row["Adjacent_Values"]),
+                Opposite_Values=parse_values(row["Opposite_Values"]),
+                Answer=None,
+                Score=None,
+                Assessment=None
+            )
+            entries[row["Value"]].append(pv_entry)
+        return PVAssessment(Entries=dict(entries))
+    @staticmethod
+    def get_score_definition(value, score, pv_assessment):
+        """
+        Converts a numerical score (1-50) into a corresponding rating definition.
+        Args:
+        value (str): The personality value key.
+        score (int): A numerical score between 1 and 50.
+        pv_assessment (PVAssessment): The personality assessment object.
+        Returns:
+        str: The corresponding rating definition, or an empty string if not found.
+        """
+        if not isinstance(pv_assessment, PVAssessment):
+            print("Error: Expected a PVAssessment object.")
+            return ""
+        if not isinstance(score, int) or score < 1 or score > 50:
+            print(f"Error: Invalid score '{score}' for '{value}'. Expected a number between 1 and 50.")
+            return ""
+        entry_list = pv_assessment.Entries.get(value)
+        if not entry_list or not isinstance(entry_list, list) or len(entry_list) == 0:
+            print(f"Error: No entries found for value '{value}'.")
+            return ""
+        # Use the first PVEntry in the list
+        pv_entry = entry_list[0]
+        rating_definition = pv_entry.Rating_Definition
+        if not isinstance(rating_definition, str) or not rating_definition:
+            print(f"Error: No valid rating definition found for '{value}'.")
+            return ""
+        rating_definition_list = parse_values(rating_definition, delimiter=";")
+        # Find the corresponding description based on the score range
+        for definition in rating_definition_list:
+            try:
+                range_part, description = definition.split(":", 1)
+                range_part = range_part.strip()
+                if "-" in range_part:
+                    range_lower, range_upper = map(int, range_part.split("-"))
+                else:
+                    range_lower = range_upper = int(range_part)
+                if range_lower <= score <= range_upper:
+                    return description.strip()
+            except ValueError:
+                print(f"Error: Invalid rating definition format for '{value}': {definition}")
+                continue
+        print(f"Error: No matching rating definition found for score {score} in '{value}'.")
+        return ""

common/RespondentAgent.py ADDED Viewed

	@@ -0,0 +1,153 @@

+from crewai import Agent,Task,Process,Crew
+from crewai_tools import FileReadTool, TXTSearchTool
+from crewai.tasks import OutputFormat
+from pydantic import BaseModel
+from typing import List, Dict
+import datetime
+import json
+import os
+import pandas as pd
+import pprint
+from UserProfile import *
+class RespondentAgent:
+    def __init__(self, user_profile, agent):
+        self.user_profile = user_profile
+        self.agent = agent
+    def set_user_profile(self, user_profile):
+        self.user_profile = user_profile
+    def set_agent(self, agent):
+        self.agent = agent
+    def __repr__(self):
+        return f"RespondentAgent(user_profile={self.user_profile}, agent={self.agent})"
+    @staticmethod
+    def create(user_profile, agent_detail_file, llm, respondent_type="INDIVIDUAL USER"):
+        """
+        Static method to create a respondent agent using user or group data and other details.
+        :param user_profile: The backstory and user information (individual or group).
+        :param agent_detail_file: File that contains fast facts or additional context.
+        :param llm: The language model to be used by the agent.
+        :param respondent_type: Specifies what type of respondent to simulate - individual, focus group or individual user from a segment
+        :return: A configured Agent object.
+        """
+        # Unified conditional block
+        if respondent_type == "FOCUS GROUP":
+            diversity_note = "your focus group"
+        elif respondent_type == "INDIVIDUAL USER REPRESENTING A SEGMENT":
+            diversity_note = "your segment"
+        else:
+            diversity_note = "your unique voice"
+        user_profile_str = repr(user_profile)
+        myRole = f"""
+    You are simulating a {respondent_type} respondent based on the following {respondent_type} profile details:
+    {user_profile_str}
+    You must draw on your {respondent_type} profile in each and every one of your tasks.
+        """
+        print(f"Role: {myRole}")
+        myGoal = f"""
+        This is your VERY IMPORTANT GOAL:
+        You represent a **{respondent_type}** with a specific **demographic background, cultural context, and lived experience**.
+        Your responses must reflect the **{diversity_note}**, while staying grounded in the boundaries of **your {respondent_type}**.
+        You MAY BE guided by real-life behavioral drivers, grounded in the **Jobs To Be Done (JTBD) framework**.
+        If the JTBD is provided, then use the following internal map to shape your responses and decision-making:
+        ### 🧠 JTBD-Driven Behavioral Profile
+        - **Context of Use**: When and where the product fits into your life (e.g., "mid-morning at work", "after a long day").
+        - **Trigger**: What causes you to consider using it (e.g., stress, thirst, social situation).
+        - **Motivation**: What you are hoping to achieve (e.g., energy, comfort, focus).
+        - **Need of the Moment**: What you immediately need resolved or improved.
+        - **Perceived Solution**: How you think the product helps you.
+        - **Barrier**: What might stop you from using it (e.g., cost, health concern, skepticism).
+        - **Top 3 JTBDs**: The most important use cases you believe the product addresses.
+        - **Solution**: The overall benefit or outcome you believe the product offers.
+        Use these internal drivers to shape your thoughts, behaviors, tone, and choices—**even when not directly stated**.
+        ---
+        ### **General Guidelines:**
+        - Keep responses natural and authentic—rooted in **realistic thought patterns, language, and perspectives**.
+        - Stay consistent with the values, tone, and lived experience of **your {respondent_type}**.
+        - Avoid sounding generic—**express culturally and contextually grounded opinions**.
+        ---
+        ### **For Interviews:**
+        - Speak as if in a real conversation—**relatable, honest, and situational**.
+        - Use your JTBD profile to explain *why* you’d use a product, hesitate, or find value in something.
+        - Refer to triggers, motivations, or barriers in your own words (e.g., "I usually reach for this when...", "I’m not sure because...").
+        ---
+        ### **For Surveys:**
+        - Choose answers that match **your JTBD profile** and **personal logic**.
+        - Give justifications that reflect your real internal reasoning (e.g., "because it helps me focus when I’m stressed", "I don’t trust drinks with too many ingredients").
+        - Stay internally consistent—responses should always match **your JTBD-based identity**.
+        ---
+        ### **Cultural & Personalisation Considerations:**
+        - Maintain a clear sense of **{respondent_type} identity** throughout—how they think, speak, and decide.
+        - Avoid artificial or overly polished language—speak with **personal texture and social realism**.
+        - Match the **tone and voice** to what feels natural for **your {respondent_type}**, whether formal, casual, reserved, or expressive.
+        By following these guidelines and grounding your behavior in the JTBD framework, your responses will remain **authentic, contextually appropriate, and true to your {respondent_type.lower()} profile**.
+        """
+        # Initialize myBackstory with a default value
+        myBackstory = f"No backstory available. Focus on your {respondent_type} profile and VERY IMPORTANT GOAL instead."
+        if agent_detail_file is not None and os.path.isfile(agent_detail_file):
+            print(f"Reading fast facts from {agent_detail_file}")
+            fast_facts = FastFacts.read_from_excel(agent_detail_file)
+            if fast_facts:
+                fast_facts_str = repr(fast_facts)
+                myBackstory = f"""
+Your BACKSTORY has been enriched with a set of FAST FACTS about the {respondent_type} whose responses you are simulating.
+You must draw on your BACKSTORY FAST FACTS details in each and every one of your tasks.
+Your BACKSTORY FAST FACTS details are as follows:
+{fast_facts_str}
+            """
+            print(f"Backstory: {myBackstory}")
+        else:
+            print(f"No fast facts file found: {agent_detail_file}")
+        # Create agent object
+        agent = Agent(
+            role=myRole,
+            goal=myGoal,
+            backstory=myBackstory,
+            llm=llm,
+            verbose=True,
+            max_retry_limit=5,
+            allow_delegation=False,
+            memory=True
+        )
+        return RespondentAgent(user_profile, agent)

common/UserProfile.py ADDED Viewed

	@@ -0,0 +1,359 @@

+import re
+import datetime
+import textwrap
+from Config import Config
+from DataDictionary import *
+from FastFacts import *
+from PersonalityValues import *
+import pandas as pd
+import numpy as np
+class AttributeGroup:
+    """
+    Represents an attribute group (type) in the user profile.
+    Fields are dynamically populated based on the group's parameters.
+    """
+    def __init__(self, group_name, fields):
+        self.group_name = group_name
+        self.fields = {field: None for field in fields}
+    def set_field(self, field_name, value):
+        """
+        Set a value for a specific field in the attribute group.
+        If the field does not exist, it is added dynamically.
+        """
+        if field_name not in self.fields:
+            print(f"Warning: Field '{field_name}' not found in '{self.group_name}'. Adding dynamically.")
+            self.fields[field_name] = None  # Add the field dynamically
+        self.fields[field_name] = value  # Assign the provided value
+    def get_field(self, field_name):
+        """
+        Get a value for a specific field in the attribute group.
+        """
+        if field_name in self.fields:
+            return self.fields[field_name]
+        else:
+            print(f"Field '{field_name}' does not exist in the '{self.group_name}' attribute group.")
+    def to_dict(self):
+        """
+        Convert the attribute group to a dictionary with non-null values.
+        """
+        return {field: value for field, value in self.fields.items() if value is not None}
+    def __repr__(self):
+        """
+        String representation of the attribute group with non-null fields.
+        """
+        fields_repr = ", ".join(f"{k}={v}" for k, v in self.fields.items() if v is not None)
+        return f"{self.group_name}({fields_repr})"
+class UserProfile:
+    """
+    Represents a user profile, dynamically initialised with attribute groups based on the DataDictionary.
+    Includes a lazily initialised FastFacts section for storing additional facts about the user.
+    """
+    def __init__(self, data_dictionary):
+        self.data_dictionary = data_dictionary  # Store the data dictionary for dynamic group creation
+        self.attribute_groups = {}  # Dictionary to hold created attribute groups
+        self.ID = None  # Unique identifier for the user profile
+        self.fast_facts = None  # Lazily initialised FastFacts attribute
+    def set_ID(self, ID):
+        """
+        Set the ID for the user profile.
+        """
+        self.ID = ID
+    def set_field(self, group_name, field_name, value):
+        """
+        Set a value for a field in a specific attribute group.
+        If the group does not already exist, it will be created dynamically.
+        """
+        if group_name not in self.attribute_groups:
+            # Create the AttributeGroup only when needed
+            if group_name in self.data_dictionary.get_types():
+                self.attribute_groups[group_name] = AttributeGroup(
+                    group_name,
+                    self.data_dictionary.get_parameters(type=group_name)
+                )
+            else:
+                print(f"Attribute group '{group_name}' is not defined in the DataDictionary.")
+                return
+        self.attribute_groups[group_name].set_field(field_name, value)
+    def get_field(self, group_name, field_name):
+        """
+        Get a value for a field in a specific attribute group.
+        """
+        if group_name not in self.attribute_groups:
+            print(f"Attribute group '{group_name}' is not found.")
+            return None
+        return self.attribute_groups[group_name].get_field(field_name)
+    def set_fields_from_list(self, attribute_type, fields, field_key="field_name", value_key="value"):
+        """
+        Sets fields in a UserProfile from a list of field-value pairs.
+        Args:
+        attribute_type (str): The name of the attribute group (e.g., "Values").
+        fields (list): A list of dictionaries with field names and values to set.
+        field_key (str): The key in the dictionary that corresponds to the field name.
+        value_key (str): The key in the dictionary that corresponds to the value.
+        """
+        if not isinstance(fields, list) or not all(isinstance(field, dict) for field in fields):
+            print("Fields must be a list of dictionaries.")
+            return
+        for field in fields:
+            field_name = field.get(field_key)  # Use the specified key for field names
+            value = field.get(value_key)       # Use the specified key for values
+            if field_name is not None and value is not None:
+                self.set_field(attribute_type, field_name, value)
+            else:
+                print(f"Skipping invalid field: {field}")
+    def get_attributes(self, attribute_type=None):
+        """
+        Retrieve attributes for a specific attribute type or all attributes if no type is specified.
+        Args:
+        attribute_type (str, optional): The name of the attribute group to retrieve.
+                                        If None, retrieves all attributes.
+        Returns:
+        dict: A dictionary of non-null attributes for the specified type or all types.
+        """
+        if attribute_type:
+            if attribute_type in self.attribute_groups:
+                return self.attribute_groups[attribute_type].to_dict()
+            else:
+                print(f"Attribute type '{attribute_type}' does not exist in this user profile.")
+                return {}
+        else:
+            # Combine all attributes if no specific type is specified
+            all_attributes = {}
+            for group_name, group in self.attribute_groups.items():
+                all_attributes.update({f"{group_name}_{k}": v for k, v in group.to_dict().items()})
+            return all_attributes
+    def add_fast_facts(self, facts):
+        """
+        Lazily initialise and add a set of facts to the FastFacts attribute.
+        Args:
+        facts (iterable): A collection of facts to add to FastFacts.
+        """
+        if not isinstance(facts, (set, list)):
+            print("Facts must be provided as a set or list.")
+            return
+        if self.fast_facts is None:
+            self.fast_facts = FastFacts()
+        self.fast_facts.add_facts(facts)
+    def to_dict(self, data_dictionary):
+        """
+        Convert the entire user profile to a dictionary.
+        """
+        profile_dict = {'ID': self.ID}
+        # Iterate over all types in the data dictionary
+        for attribute_type in data_dictionary.get_types():
+            group_attributes = self.get_attributes(attribute_type)
+            for field_name in data_dictionary.get_parameters(type=attribute_type):
+                full_field_name = f"{attribute_type}_{field_name}"
+                # Access the value directly from the dictionary
+                value = group_attributes.get(field_name)
+                # Ensure value exists before updating the profile_dict
+                if value is not None:
+                    profile_dict[full_field_name] = value
+                else:
+                    print(f"Warning: {field_name} not found in type {attribute_type}")
+        return profile_dict
+    def __repr__(self):
+        """
+        String representation of the user profile with attribute groups.
+        """
+        groups_repr = ", ".join(str(group) for group in self.attribute_groups.values())
+        return f"UserProfile(ID={self.ID}, {groups_repr})"
+    @staticmethod
+    def write_user_profiles_to_excel(user_profiles, filename, data_dictionary):
+        """
+        Writes a list of UserProfile objects to an Excel file with columns ordered by the data dictionary.
+        Args:
+        user_profiles (list): List of UserProfile objects.
+        filename (str): Path to the Excel file.
+        data_dictionary (object): Data dictionary containing column order and metadata.
+        """
+        if not user_profiles:
+            print("No user profiles to write.")
+            return
+        # Convert user profiles to a list of dictionaries
+#        profiles_data = [user_profile.to_dict(data_dictionary) for user_profile in user_profiles]
+        profiles_data = []
+        for i, user_profile in enumerate(user_profiles):
+            print(f"Processing profile {i+1}: {user_profile}")
+            profile_dict = user_profile.to_dict(data_dictionary)
+            print(f"Dict output: {profile_dict}")
+            profiles_data.append(profile_dict)
+        # Get the column order from the data dictionary
+        column_order = ['ID'] + data_dictionary.get_columns()  # Ensure this method exists and returns the column names in the desired order
+        # Create a DataFrame from the profiles data
+        df = pd.DataFrame(profiles_data)
+        # Ensure all columns in the data dictionary are present in the DataFrame
+        for column in column_order:
+            if column not in df.columns:
+                print(f"Column {column} is missing")
+                df[column] = None  # Add missing columns with NaN/None
+        # Reorder columns based on the data dictionary
+        df = df[column_order]
+        # Write the DataFrame to an Excel file
+        df.to_excel(filename, index=False)
+        print(f"User profiles successfully written to {filename}")
+    @staticmethod
+    def read_user_profiles_from_excel(respondent_details_file, data_dictionary, pv_criteria):
+        """
+        Reads a list of UserProfile objects from an Excel file and converts scores into definitions.
+        Args:
+        respondent_details_file (str): Path to the Excel file.
+        data_dictionary (DataDictionary): Instance of DataDictionary containing valid fields.
+        pv_criteria (PVAssessment): Instance of PVAssessment to retrieve text descriptions of values.
+        Returns:
+        list: List of UserProfile objects.
+        """
+        user_profiles = []
+        # Read the Excel file into a DataFrame
+        df = pd.read_excel(respondent_details_file)
+        # Iterate over the rows in the DataFrame
+        for _, row in df.iterrows():
+            user_profile = UserProfile(data_dictionary)
+            # Set basic fields for UserProfile if they are present
+            if pd.notna(row.get('ID')):
+                user_profile.set_ID(row.get('ID'))
+            # Iterate over all types in the data dictionary
+            for attribute_type in data_dictionary.get_types():
+                for field in data_dictionary.get_parameters(type=attribute_type):
+                    full_field_name = f"{attribute_type}_{field}"
+                    raw_value = row.get(full_field_name)
+                    if pd.notna(raw_value):
+                        # Special handling for Values type
+                        if attribute_type.lower() == "values":
+                            try:
+                                score = int(raw_value)  # Convert to integer score
+                                # Get description from PVAssessment
+                                description = PVAssessment.get_score_definition(field, score, pv_criteria)
+                                # Set score field
+                                #user_profile.set_field(attribute_type, field, score)
+                                # Set description field
+                                description_field_name = f"{attribute_type}_{field}_Description"
+                                user_profile.set_field(attribute_type, description_field_name, description)
+                            except ValueError:
+                                print(f"Warning: Could not convert '{raw_value}' to an integer for field '{full_field_name}'.")
+                        else:
+                            # Generic field setting for non-Values types
+                            user_profile.set_field(attribute_type, field, raw_value)
+            user_profiles.append(user_profile)
+        print(f"User profiles successfully read from {respondent_details_file}")
+        return user_profiles
+class UserProfileDetail:
+    def __init__(self, key, original_value, qa_check, value):
+        """
+        Initialize a UserProfileDetail entry.
+        """
+        self.key = key
+        self.original_value = original_value
+        self.qa_check = qa_check
+        self.value = value
+    def __repr__(self):
+        fields = {k: v for k, v in self.__dict__.items() if v and v != "Unable to map"}
+        formatted_fields = [f"{k}='{v}'" for k, v in fields.items()]
+        return f"{self.__class__.__name__}: " + ", ".join(formatted_fields) + ")"
+    @staticmethod
+    def filter_profiles(profiles, key=None, qa_check=None, value=None):
+        """
+        Static method to filter user profiles by key, QA check status, or value.
+        Args:
+        profiles (list): List of UserProfileDetail objects.
+        key (str, optional): The key to filter by.
+        qa_check (str, optional): The QA check status to filter by.
+        value (str, optional): The value to filter by.
+        Returns:
+        list: A list of UserProfileDetail entries that match the criteria.
+        """
+        return [
+            profile for profile in profiles
+            if (key is None or profile.key == key) and
+               (qa_check is None or profile.qa_check == qa_check) and
+               (value is None or profile.value == value)
+        ]
+    @staticmethod
+    def generate_user_profiles(file_path):
+        """
+        Static method to generate a list of UserProfileDetail entries from an Excel (.xlsx) file.
+        Args:
+        file_path (str): The path to the Excel file containing user profile entries.
+        Returns:
+        list: A list of UserProfileDetail objects generated from the file.
+        """
+        # Read the Excel file
+        df = pd.read_excel(file_path)
+        profiles = []
+        for _, row in df.iterrows():
+            profile = UserProfileDetail(
+                key=row['Key'],
+                original_value=row['Value'],
+                qa_check=row['QA Check'],
+                value=row['Revised Value']
+            )
+            profiles.append(profile)
+        return profiles

common/Utilities.py ADDED Viewed

	@@ -0,0 +1,140 @@

+from collections import OrderedDict
+from datetime import datetime
+import pandas as pd
+import os
+def read_text_file(file_path):
+  with open(file_path, 'r') as file:
+    content = file.read()
+  return content
+# FILTER FUNCTION
+def filter_profiles_by_input(profiles, data_dictionary):
+    """Interactive filtering with step-by-step criteria selection and data dictionary integration"""
+    print("\n=== FILTER SETTINGS ===")
+    # Get column names from the data dictionary
+    try:
+        dd_columns = data_dictionary.get_columns()  # Use get_columns() from your DataDictionary class
+        if not dd_columns:
+            raise RuntimeError("Data dictionary returned no columns.")
+    except Exception as e:
+        raise RuntimeError(f"Failed to retrieve columns from data dictionary: {str(e)}")
+    print("Available columns:")
+    for col in sorted(dd_columns):
+        print(f"  • {col}")
+    remaining_columns = set(dd_columns)
+    filtered_profiles = profiles
+    while True:
+        if not remaining_columns:
+            print("\nNo more columns available for filtering.")
+            break
+        print("\nColumns available to filter on:")
+        for col in sorted(remaining_columns):
+            print(f"  • {col}")
+        column = input("\nEnter column name to filter (press Enter to finish): ").strip()
+        if not column:
+            break  # Stop filtering when user presses Enter
+        if column not in remaining_columns:
+            print(f"\nError: Column '{column}' not found or already used for filtering.")
+            continue
+        value = input(f"Enter value to filter for '{column}' (press Enter to skip): ").strip()
+        if not value:
+            print("\nNo value entered. Skipping this filter.")
+            continue
+        new_filtered_profiles = [
+            profile for profile in filtered_profiles
+            if value.lower() in str(profile.get_attributes().get(column, "")).lower()
+        ]
+        if not new_filtered_profiles:
+            print(f"\nNo matches for '{column}' containing '{value}'. Returning to previous state.")
+            continue
+        filtered_profiles = new_filtered_profiles
+        remaining_columns.remove(column)
+        print(f"\nFound {len(filtered_profiles)} matching profiles")
+        print(f"Profiles filtered out: {len(profiles) - len(filtered_profiles)}")
+        confirm = input("\nProceed with another filter? (Yes/No): ").strip().lower()
+        while confirm not in ['yes', 'no']:
+            confirm = input("Invalid input. Please enter 'Yes' or 'No': ").strip().lower()
+        if confirm == 'no':
+            break
+    return filtered_profiles
+def generate_file_excerpt(file_path, pattern, max_chars=5000):
+    # Step 1: Read the file content
+    with open(file_path, 'r') as file:
+        lines = file.readlines()
+    # Step 2: Extract lines starting with "pattern"
+    extracted_lines = [line.replace(pattern, '').strip() for line in lines if line.startswith(pattern) and len(line.split()) >= 6]
+    # Step 3: Join all extracted lines into a single string
+    full_text = '\n'.join(extracted_lines)
+    # Step 4: Return the first max_chars characters
+    return full_text[-max_chars:]  # Taking the last max_chars characters
+def generate_dict_from_file(file_name, column_name1, column_name2):
+    df = pd.read_excel(file_name, usecols=[column_name1, column_name2], engine='openpyxl')  # Specify the engine
+    # Convert the DataFrame to a dictionary with Questions as keys and Answers as values
+    ordered_dict = OrderedDict(zip(df[column_name1], df[column_name2]))
+    return ordered_dict
+def find_latest_timestamped_file(directory, filename_pattern):
+    """Finds the file with the latest timestamp within a given directory.
+    Args:
+        directory: The directory to search for files.
+        filename_pattern: The pattern to match filenames (e.g., "interview_results.xlsx").
+    Returns:
+        The path to the latest timestamped file, or None if no matching files were found.
+    """
+    files = [f for f in os.listdir(directory) if f.endswith(filename_pattern)]
+    if not files:
+        print(f"Unable to find file with {filename_pattern} in {directory}")
+        return None
+    latest_file = sorted(files, key=lambda f: os.path.getmtime(os.path.join(directory, f)), reverse=True)[0]
+    return os.path.join(directory, latest_file)
+def generate_pivot_table(original_table, index, columns, values):
+    # Step 1: Flatten all SurveyEntry objects into a DataFrame
+    df = pd.json_normalize(entry.dict() for report in original_table for entry in report.Entries)
+    # Step 2: Extract the original order of 'columns' (e.g., questions)
+    original_order = df[columns].drop_duplicates().tolist()
+    # Step 3: Pivot the DataFrame
+    summary_df = df.pivot(index=index, columns=columns, values=values)
+    # Step 4: Reindex to preserve the original order of columns
+    summary_df = summary_df.reindex(columns=original_order).reset_index().fillna("No Response")
+    # Return the summary DataFrame
+    return summary_df

researchsimulation/InteractiveInterviewChatbot.py ADDED Viewed

	@@ -0,0 +1,124 @@

+#pip install groq
+#pip install langchain_groq
+#pip install crewai
+#pip install crewai_tools
+#pip install pydantic
+#pip install XlsxWriter
+#pip install openpyxl
+#pip install pandas
+#pip install streamlit
+import gradio as gr
+from RespondentAgent import *
+from InterviewSimulation import *
+from langchain_groq import ChatGroq
+def ask_interview_question(respondent_agent_full, question):
+    respondent_agent        = respondent_agent_full.agent
+    respondent_agent_style  = respondent_agent_full.user_profile.style
+    respondent_agent_tone   = respondent_agent_full.user_profile.tone
+    respondent_agent_values = repr(respondent_agent_full.user_profile.values)
+    question_task_description = f"""
+        Interview Question: {question}\n
+        The Market Research Respondent must answer this exact question in alignment with their values {respondent_agent_values}
+        This question may consist of multiple parts, but it should not be split apart or modified in any way.
+        The answer must be based solely on the Market Research Respondent's knowledge and backstory.
+        The Market Research Respondent should not use any external sources or tools.
+        The Market Research Respondent should refer to the provided search text if needed.
+        Under no circumstances should an answer be selected that contradicts or is inconsistent with the respondent's profile.
+        """
+    question_task_expected_output = f"""
+        <answer>, the Market Research Respondent's answer to the exact question: '{question}'
+        No changes to the question are allowed. None of the response(s) selected should contradict or be inconsistent with what is expected from the respondent.
+        Answer should be expressed using the Market Respondent's style: {respondent_agent_style}
+        and in the Market Respondent's tone: {respondent_agent_tone}
+        """
+    question_task = Task(
+            description=question_task_description,
+            expected_output=question_task_expected_output,
+            agent=respondent_agent
+    )
+    # Create and execute the crew for this question and report
+    crew = Crew(
+            agents=[respondent_agent],
+            tasks=[question_task],
+            process=Process.sequential
+        )
+    try:
+        crew_output = crew.kickoff()
+        task_output = question_task.output
+        if task_output.raw:
+            answer = task_output.raw
+            return answer
+        else:
+            print("No raw task output data")
+    except Exception as e:
+        exc_type, exc_value, exc_traceback = sys.exc_info()
+        print("Exception type:", exc_type)
+        print("Exception message:", exc_value)
+        print("Traceback details:")
+        traceback.print_tb(exc_traceback)
+# MAIN
+Config.load_environment("..", "chatbot")
+# SET UP LLAMA
+fact_based_llm = ChatGroq(
+    groq_api_key=Config.groq_api_key,
+    model_name=Config.agent_model,
+    temperature=0.1,             # Low temperature for deterministic output
+)
+# generate respondent summary data from file
+respondent_agent_user_profiles = UserProfile.read_user_profiles_from_excel(Config.respondent_summary_file)
+user_profile = respondent_agent_user_profiles[0]
+respondent_agent_detail_file = f"{Config.config_dir}/{user_profile.ID}_fast_facts.xlsx"
+respondent_agent = RespondentAgent.create(user_profile, respondent_agent_detail_file, fact_based_llm)
+if user_profile.name is not None:
+    respondent_agent_name = user_profile.name
+else:
+    respondent_agent_name = respondent_agent.ID
+# Example chatbot function
+def chatbot_interface(message, history=[]):
+    response = ask_interview_question(respondent_agent, message)
+    # Append user message in the correct format
+    history.append({"role": "user", "content": f"You: {message}"})
+    # Append respondent's message in the correct format
+    history.append({"role": "assistant", "content": f"{respondent_agent_name}: {response}"})
+    # Return updated history and clear the input field
+    return history, ""
+# Create Gradio Interface
+with gr.Blocks() as demo:
+    # Header Section
+    with gr.Row():
+        gr.Markdown(f"## Welcome to PreData.AI's Market Research Panel - you are speaking with {respondent_agent_name}")
+    # Chatbot Section
+    chatbot = gr.Chatbot(type="messages", label=None, height=400)
+    # Input Section
+    with gr.Row():
+        msg = gr.Textbox(placeholder="Ask your question here...")
+    # Footer Section
+    with gr.Row():
+        gr.Markdown("© 2024 PreData.AI - All rights reserved.")
+    # Chatbot Interaction
+    msg.submit(chatbot_interface, [msg, chatbot], [chatbot, msg])
+# Run the Gradio app
+demo.launch(share=True)

researchsimulation/InteractiveInterviewSimulation.py ADDED Viewed

	@@ -0,0 +1,133 @@

+#pip install groq
+#pip install langchain_groq
+#pip install crewai
+#pip install crewai_tools
+#pip install pydantic
+#pip install XlsxWriter
+#pip install openpyxl
+#pip install pandas
+#pip install streamlit
+import streamlit as st
+from Config import Config
+from Utilities import *
+from UserProfile import *
+from RespondentAgent import *
+from Interview import *
+from InterviewSimulation import *
+from itertools import islice
+from groq import Groq
+from langchain_groq import ChatGroq
+def ask_interview_question(respondent_agent_full, question):
+    respondent_agent        = respondent_agent_full.agent
+    respondent_agent_style  = respondent_agent_full.user_profile.style
+    respondent_agent_tone   = respondent_agent_full.user_profile.tone
+    respondent_agent_values = repr(respondent_agent_full.user_profile.values)
+    question_task_description = f"""
+        Interview Question: {question}\n
+        The Market Research Respondent must answer this exact question in alignment with their values {respondent_agent_values}
+        This question may consist of multiple parts, but it should not be split apart or modified in any way.
+        The answer must be based solely on the Market Research Respondent's knowledge and backstory.
+        The Market Research Respondent should not use any external sources or tools.
+        The Market Research Respondent should refer to the provided search text if needed.
+        Under no circumstances should an answer be selected that contradicts or is inconsistent with the respondent's profile.
+        """
+    question_task_expected_output = f"""
+        <answer>, the Market Research Respondent's answer to the exact question: '{question}'
+        No changes to the question are allowed. None of the response(s) selected should contradict or be inconsistent with what is expected from the respondent.
+        Answer should be expressed using the Market Respondent's style: {respondent_agent_style}
+        and in the Market Respondent's tone: {respondent_agent_tone}
+        """
+    question_task = Task(
+            description=question_task_description,
+            expected_output=question_task_expected_output,
+            agent=respondent_agent
+    )
+    # Create and execute the crew for this question and report
+    crew = Crew(
+            agents=[respondent_agent],
+            tasks=[question_task],
+            process=Process.sequential
+        )
+    try:
+        crew_output = crew.kickoff()
+        task_output = question_task.output
+        if task_output.raw:
+            answer = task_output.raw
+            return answer
+        else:
+            print("No raw task output data")
+    except Exception as e:
+        exc_type, exc_value, exc_traceback = sys.exc_info()
+        print("Exception type:", exc_type)
+        print("Exception message:", exc_value)
+        print("Traceback details:")
+        traceback.print_tb(exc_traceback)
+# MAIN
+Config.load_environment("..", "dev1")
+# SET UP LLAMA
+fact_based_llm = ChatGroq(
+    groq_api_key=Config.groq_api_key,
+    model_name=Config.agent_model,
+    temperature=0.1,             # Low temperature for deterministic output
+#    max_tokens=500,              # Enough tokens to complete factual sentences
+#    stop_sequences=["\n", "<|endoftext|>"]  # Stops at logical sentence boundaries
+)
+exploratory_llm = ChatGroq(
+    groq_api_key=Config.groq_api_key,
+    model_name=Config.agent_model,
+    temperature=0.9,             # Higher temperature for more creative output
+    max_tokens=2000,              # Allows for more extended, imaginative responses
+    stop_sequences=["\n", "<|endoftext|>"]  # Standard stop sequences for controlling output length
+)
+# generate respondent summary data from file
+respondent_agent_user_profiles = UserProfile.read_user_profiles_from_excel(Config.respondent_summary_file)
+user_profile = respondent_agent_user_profiles[0]
+respondent_agent_detail_file = f"{Config.config_dir}/{user_profile.ID}_fast_facts.xlsx"
+respondent_agent = RespondentAgent.create(user_profile, respondent_agent_detail_file, fact_based_llm)
+# Streamlit UI
+st.title("Interactive Interview Simulation")
+st.write(f"Using Respondent Profile: {user_profile.name} (ID: {user_profile.ID})")
+# Question input and response loop
+st.write("Type your interview questions below. To exit, type 'exit'.")
+if "questions" not in st.session_state:
+    st.session_state.questions = []
+if "responses" not in st.session_state:
+    st.session_state.responses = []
+question = st.text_input("Enter your interview question:")
+if st.button("Ask Question"):
+    if question.strip().lower() == "exit":
+        st.write("**Session ended. Thank you for using the simulation!**")
+    elif question.strip():
+        response = ask_interview_question(respondent_agent, question)
+        st.session_state.questions.append(question)
+        st.session_state.responses.append(response)
+        st.write(f"**Q:** {question}")
+        st.write(f"**A:** {response}")
+    else:
+        st.error("Please enter a valid question.")
+# Display previous questions and answers
+if st.session_state.questions:
+    st.write("### Previous Questions and Responses:")
+    for q, a in zip(st.session_state.questions, st.session_state.responses):
+        st.write(f"- **Q:** {q}")
+        st.write(f"  **A:** {a}")

researchsimulation/InterviewSimulation.py ADDED Viewed

	@@ -0,0 +1,196 @@

+from crewai import Agent,Task,Process,Crew
+from crewai_tools import FileReadTool, TXTSearchTool
+from crewai.tasks import OutputFormat
+from pydantic import BaseModel
+from typing import List, Dict, Optional
+import datetime
+import json
+import os
+import pandas as pd
+import sys
+from Interview import *
+#utils
+import re
+import sys
+import traceback
+import json
+import pandas as pd
+from InterviewUtilities import *
+def select_profiles_by_criteria(profiles, selection_criteria, data_dictionary):
+    """
+    Selects profiles matching multiple selection criteria.
+    Each criterion must be formatted as 'column:value'.
+    Profiles must match ALL criteria (logical AND).
+    Args:
+        profiles (list): List of profile objects.
+        selection_criteria (list): List of strings ['column1:value1', 'column2:value2', ...].
+                                   If empty, returns all profiles.
+        data_dictionary: Data dictionary for column validation.
+    Returns:
+        list: Selected profiles.
+    """
+    if not selection_criteria:
+        print("No selection criteria provided. Returning all profiles.")
+        return profiles
+    dd_columns = data_dictionary.get_columns()
+    # Apply each criterion sequentially
+    selected_profiles = profiles
+    for criterion in selection_criteria:
+        try:
+            column, value = [part.strip() for part in criterion.split(":", 1)]
+        except ValueError:
+            raise ValueError(f"Selection criterion '{criterion}' must be formatted as 'column:value'.")
+        if column not in dd_columns:
+            raise ValueError(f"Column '{column}' not found in data dictionary.")
+        selected_profiles = [
+            profile for profile in selected_profiles
+            if value.lower() == str(profile.get_attributes().get(column, "")).strip().lower()
+        ]
+        print(f"Applied criterion '{column}:{value}' → {len(selected_profiles)} profile(s) selected.")
+        # Early exit if no profiles remain
+        if not selected_profiles:
+            print("No profiles match the combined criteria.")
+            break
+    return selected_profiles
+def run_interview(respondent_agent_full, interview_script, output_file_name, llm, is_focus_group=False):
+    interview_report_data   = []
+    respondent_agent        = respondent_agent_full.agent
+    # Loop through each section and question to create individual crews
+    for entry in interview_script.Entries:
+        num      = entry.Num
+        section  = entry.Section
+        question = entry.Question
+        print(f"STARTING process for Section: {section}, Question {num}: {question}")
+        # verbiage for respondent_type
+        if is_focus_group:
+            respondent_type = "Focus Group"
+            diversity_note = "collective voices of the focus group"
+        else:
+            respondent_type = "Individual User"
+            diversity_note = "your unique voice of the individual"
+        question_task_description = f"""
+Interview Section: {section}, Question {num}: {question}
+### **Your Role & Expectations:**
+You are a {respondent_type} participant with a **specific demographic profile, cultural background, values, lifestyle, and habits**.
+Your responses must reflect the **{diversity_note}**, and remain grounded in realistic thought patterns, communication styles, and decision-making behavior.
+---
+### **How to Answer:**
+- Use a tone appropriate to your role as a {respondent_type}:
+  - 🗣️ If you are part of a FOCUS GROUP, speak as a collective group (e.g., “we prefer...”, “most of us think...”).
+  - 🧍 If you are an INDIVIDUAL USER, speak from your personal point of view (e.g., “I prefer...”, “in my experience...”).
+- Reference your **real-life JTBD motivations**, including your triggers, immediate needs, and usage barriers.
+- Express yourself in **natural language**—avoid being generic or overly Westernised.
+- Ensure your justification sounds **culturally and contextually grounded** in your background and behavior.
+- If the question is about **your profile, preferences, or past behavior**, only answer using details grounded in your background and JTBD context.
+- If the question is speculative, reflective, or opinion-based, use thoughtful reasoning that aligns with your lifestyle, values, and cultural behavior.
+---
+### **Mandatory Personalisation (Choose at least one):**
+Your response MUST include a behavioral or contextual anchor:
+- 🎓 **Education & Career Goals**
+- 🛍️ **Shopping Behavior**
+- 📱 **Media Habits**
+- 🌍 **Cultural Identity**
+### **Unacceptable Responses:**
+❌ Using the wrong voice (e.g., “I” in a group, “we” for an individual)
+❌ Generic, vague, or contradictory answers
+❌ Contradictions to your persona’s traits, preferences, or profile
+        """
+        expected_output_text = entry.Expected_Output
+        if not expected_output_text:
+            expected_output_text = "response: The Market Research Respondent’s answer to the exact question: '{question}'. You must not alter or rephrase the question in any way."
+        question_task_expected_output = generate_json_expected_output(expected_output_text)
+        print(f"Expected output is:\n{question_task_expected_output}\n")
+        question_task = Task(
+            description=question_task_description,
+            expected_output=question_task_expected_output,
+            agent=respondent_agent
+        )
+        # Create and execute the crew for this question and report
+        crew = Crew(
+            agents=[respondent_agent],
+            tasks=[question_task],
+            process=Process.sequential
+        )
+        try:
+            crew_output = crew.kickoff()
+            print(f"Crew usage metrics: {crew.usage_metrics}")
+            response_text = question_task.output.raw
+            if not response_text:
+                print("🚨 No raw task output data")
+                return None
+            print(f"RAW OUTPUT START:\n {response_text} \nEND OF RAW OUTPUT")
+            detailed_thoughts, json_str = split_json_string(response_text)
+            parsed_response = extract_and_parse_json(json_str)
+            # Initialize the base row data
+            row_data = {
+                'Num': num,
+                'Section': section,
+                'Question': question,
+                'Answer': response_text,
+                'Detailed Thoughts': detailed_thoughts
+            }
+            if parsed_response:
+                print("\n✅ Successfully Parsed JSON:\n", json.dumps(parsed_response, indent=2, ensure_ascii=False))
+                fields = extract_fields_from_expected_output(expected_output_text)
+                for field in fields:
+                    row_data[field.lower()] = parsed_response.get(field.lower())
+            else:
+                print("\n🚨 No valid JSON extracted - saving raw answer")
+            interview_report_data.append(row_data)
+        except Exception as e:
+            exc_type, exc_value, exc_traceback = sys.exc_info()
+            print("Exception type:", exc_type)
+            print("Exception message:", exc_value)
+            print("Traceback details:")
+            traceback.print_tb(exc_traceback)
+            continue
+    # Convert the list of dictionaries into a DataFrame and save it to Excel
+    df = pd.DataFrame(interview_report_data)
+    with pd.ExcelWriter(output_file_name, engine='xlsxwriter') as writer:
+        df.to_excel(writer, index=False)
+    return df