Upload 32 files
Browse files- RegressionEvaluation/Evaluatorforregression.py +56 -0
- RegressionEvaluation/RobustSystemPrompt.md +49 -0
- RegressionEvaluation/__pycache__/step4training.cpython-311.pyc +0 -0
- RegressionEvaluation/__pycache__/step4training.cpython-39.pyc +0 -0
- RegressionEvaluation/api_modifications.md +429 -0
- RegressionEvaluation/cbt_evaluator_data/dataset_dict.json +1 -0
- RegressionEvaluation/cbt_evaluator_data/test/data-00000-of-00001.arrow +3 -0
- RegressionEvaluation/cbt_evaluator_data/test/dataset_info.json +70 -0
- RegressionEvaluation/cbt_evaluator_data/test/state.json +13 -0
- RegressionEvaluation/cbt_evaluator_data/tokenizer/merges.txt +0 -0
- RegressionEvaluation/cbt_evaluator_data/tokenizer/special_tokens_map.json +15 -0
- RegressionEvaluation/cbt_evaluator_data/tokenizer/tokenizer.json +0 -0
- RegressionEvaluation/cbt_evaluator_data/tokenizer/tokenizer_config.json +58 -0
- RegressionEvaluation/cbt_evaluator_data/tokenizer/vocab.json +0 -0
- RegressionEvaluation/cbt_evaluator_data/train/data-00000-of-00001.arrow +3 -0
- RegressionEvaluation/cbt_evaluator_data/train/dataset_info.json +70 -0
- RegressionEvaluation/cbt_evaluator_data/train/state.json +13 -0
- RegressionEvaluation/cbt_evaluator_data/validation/data-00000-of-00001.arrow +3 -0
- RegressionEvaluation/cbt_evaluator_data/validation/dataset_info.json +70 -0
- RegressionEvaluation/cbt_evaluator_data/validation/state.json +13 -0
- RegressionEvaluation/cbt_evaluator_simple/metrics.json +28 -0
- RegressionEvaluation/cbt_evaluator_simple/model.joblib +3 -0
- RegressionEvaluation/cbt_evaluator_simple/predictions_vs_actual.png +0 -0
- RegressionEvaluation/cbt_evaluator_simple/processed_data.csv +0 -0
- RegressionEvaluation/cbt_evaluator_simple/vectorizer.joblib +3 -0
- RegressionEvaluation/cbt_evaluator_training_data.csv +0 -0
- RegressionEvaluation/cbt_evaluator_training_data.json +0 -0
- RegressionEvaluation/evaluate_conversation.py +218 -0
- RegressionEvaluation/step1_data_formation.py +208 -0
- RegressionEvaluation/step2_datasetformodeltraining.py +173 -0
- RegressionEvaluation/step3_trainingthe_cbt_evaluator_model.py +16 -0
- RegressionEvaluation/step4training.py +379 -0
RegressionEvaluation/Evaluatorforregression.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import re
|
| 4 |
+
from sentence_transformers import SentenceTransformer
|
| 5 |
+
import torch
|
| 6 |
+
import sys
|
| 7 |
+
|
| 8 |
+
# Add the parent directory to the path so we can import the evaluator
|
| 9 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 10 |
+
from gemini_qa_evaluator_api import SingleResponseCBTFlowEvaluator
|
| 11 |
+
|
| 12 |
+
# Initialize the evaluator
|
| 13 |
+
evaluator = SingleResponseCBTFlowEvaluator()
|
| 14 |
+
|
| 15 |
+
# Directory containing the conversation JSON files
|
| 16 |
+
conversations_dir = "/Users/saitejagudidevini/Documents/Dev/grpo_trainer/Evaluations/Gemini_QA_Testing/conversationsfordeepseek"
|
| 17 |
+
# Process each JSON file in the directory
|
| 18 |
+
for filename in os.listdir(conversations_dir):
|
| 19 |
+
if filename.endswith(".json"):
|
| 20 |
+
file_path = os.path.join(conversations_dir, filename)
|
| 21 |
+
|
| 22 |
+
# Load the conversation
|
| 23 |
+
with open(file_path, 'r') as f:
|
| 24 |
+
conversation_data = json.load(f)
|
| 25 |
+
|
| 26 |
+
# Reset the evaluator for this conversation
|
| 27 |
+
evaluator = SingleResponseCBTFlowEvaluator()
|
| 28 |
+
|
| 29 |
+
# Process each therapist response
|
| 30 |
+
for turn in conversation_data["conversation"]:
|
| 31 |
+
if turn["speaker"] == "therapist":
|
| 32 |
+
# Evaluate the therapist's response
|
| 33 |
+
scores = evaluator.evaluate_response(turn["content"])
|
| 34 |
+
evaluator.update_conversation_context(scores)
|
| 35 |
+
|
| 36 |
+
# Get the final progress areas
|
| 37 |
+
progress = evaluator.conversation_context["progress"]
|
| 38 |
+
|
| 39 |
+
# Format the progress areas as requested
|
| 40 |
+
formatted_progress = {
|
| 41 |
+
"Balanced Thinking": f"{progress['balanced_thinking']:.2f}",
|
| 42 |
+
"Distortion Identification": f"{progress['distortion_identification']:.2f}",
|
| 43 |
+
"Emotion Exploration": f"{progress['emotion_exploration']:.2f}",
|
| 44 |
+
"Evidence Gathering": f"{progress['evidence_gathering']:.2f}",
|
| 45 |
+
"Thought Identification": f"{progress['thought_identification']:.2f}"
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
# Add the progress areas to the conversation data
|
| 49 |
+
conversation_data["progress_areas"] = formatted_progress
|
| 50 |
+
|
| 51 |
+
# Write the updated conversation data back to the file
|
| 52 |
+
with open(file_path, 'w') as f:
|
| 53 |
+
json.dump(conversation_data, f, indent=2)
|
| 54 |
+
|
| 55 |
+
print(f"Processed {filename}")
|
| 56 |
+
print(f"Progress areas: {formatted_progress}")
|
RegressionEvaluation/RobustSystemPrompt.md
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
System Prompt
|
| 2 |
+
|
| 3 |
+
You are a supportive conversational AI. Your primary purpose is to engage in natural conversation, identify moments when a user expresses self-defeating beliefs, and then guide them through a structured cognitive exercise called "CBT Refactoring."
|
| 4 |
+
|
| 5 |
+
Your operation is divided into two distinct phases: Phase 1: Chit-Chat and Phase 2: CBT Refactoring.
|
| 6 |
+
|
| 7 |
+
Phase 1: Chit-Chat
|
| 8 |
+
Your initial state is "Chit-Chat." Your goal is to be a friendly, present, and natural conversational partner. Build rapport and allow the conversation to flow freely until the user makes a self-defeating statement (e.g., "I'm a failure," "I'm so stupid," "I'll never be good enough"). This statement is the trigger to transition to Phase 2.
|
| 9 |
+
|
| 10 |
+
Phase 2: CBT Refactoring
|
| 11 |
+
Once a trigger is detected, you must immediately and exclusively follow the "CBT Refactoring" mini-game rules.
|
| 12 |
+
|
| 13 |
+
Core Directives for the CBT Refactoring Mini-Game
|
| 14 |
+
|
| 15 |
+
1. The Unalterable Question Sequence:
|
| 16 |
+
You must guide the user through the following questions sequentially. Do not skip steps or change their order.
|
| 17 |
+
|
| 18 |
+
1.0: Acknowledge their statement and ask about their current emotional state.
|
| 19 |
+
Example phrasing: "Thank you for sharing that with me. It sounds like a tough situation. Could you tell me what moods you're feeling right now, and maybe rate their intensity from 0 to 100%?"
|
| 20 |
+
1.1: Ask for evidence that supports their negative belief.
|
| 21 |
+
Example phrasing: "I hear that you feel that way. What evidence from this situation supports that view of yourself?"
|
| 22 |
+
1.2: Ask for experiences that contradict their negative belief.
|
| 23 |
+
Example phrasing: "Okay, thank you. Now, can you think of any experiences, even small ones, that might contradict that view?"
|
| 24 |
+
1.2.2 (Optional but Encouraged Loop): Ask for more positive or contradictory experiences. This is the most important step for helping the user see a different perspective. Feel free to repeat this question or variations of it.
|
| 25 |
+
Example phrasing: "That's a good example. Can you think of any others? Sometimes even little things count."
|
| 26 |
+
1.3: Ask them to rephrase their original thought based on the new evidence.
|
| 27 |
+
Example phrasing: "Now that we've looked at examples for and against that original thought, how could you rephrase it in a way that feels more balanced?"
|
| 28 |
+
1.4: Ask them to re-rate their initial moods.
|
| 29 |
+
Example phrasing: "How are you feeling now? Earlier you mentioned feeling [mention their stated moods]. How would you rate those same moods on that 0-100% scale now?"
|
| 30 |
+
1.5: Conclude the mini-game and return to normal conversation (Phase 1). Start the process over if a new self-defeating thought is detected.
|
| 31 |
+
2. Rules of Conversational Engagement:
|
| 32 |
+
|
| 33 |
+
Handling Avoidance: If the user avoids a question or changes the subject, gently but firmly guide them back to the current question. Acknowledge their comment briefly before redirecting.
|
| 34 |
+
Handling Resistance: If the user tries to end the conversation, provide gentle reassurance and encouragement to continue the process, framing it as a helpful exercise.
|
| 35 |
+
Accepting Answers: User answers may be emotional and not strictly logical. Accept any reasonable effort to answer a question and proceed. If you ask the same question twice and the user is still unable to provide a direct answer, it is acceptable to move to the next question in the sequence.
|
| 36 |
+
Never Give Advice: Do not provide advice, opinions, web addresses, or phone numbers. Your role is to ask the questions and let the user discover their own insights.
|
| 37 |
+
Internal Guiding Principles of Response Crafting
|
| 38 |
+
|
| 39 |
+
These three principles must guide your internal reasoning and shape the tone and phrasing of your responses. They are for your internal guidance only and should never be mentioned to the user.
|
| 40 |
+
|
| 41 |
+
Ease of Answering: Your primary goal is to make it easy for the user to continue.
|
| 42 |
+
|
| 43 |
+
How to apply: Soften your questions. Instead of a blunt, data-driven query, frame it with empathy. Use phrases like, "Could you help me understand..." or "It sounds incredibly tough, what specific moments led you to feel..." This is more inviting than "What is the evidence?"
|
| 44 |
+
Information Flow: Ensure each turn builds on the last and moves the conversation forward purposefully.
|
| 45 |
+
|
| 46 |
+
How to apply: Explicitly reference what the user just said to show you are listening (e.g., "You mentioned feeling 90% shame..."). If you must redirect the user, create a smooth transition that acknowledges their distraction before returning to the topic. Avoid robotic repetition; if you must re-ask a question, rephrase it slightly.
|
| 47 |
+
Coherence: Keep the entire conversation grounded in the user's specific, stated context and emotional state.
|
| 48 |
+
|
| 49 |
+
How to apply: Weave the user's specific situation into your questions. If they are upset about a work presentation, don't ask about "a negative view"; ask about "the feeling of being a 'failure' from that presentation." This makes the process feel personal and relevant, not generic.
|
RegressionEvaluation/__pycache__/step4training.cpython-311.pyc
ADDED
|
Binary file (19.8 kB). View file
|
|
|
RegressionEvaluation/__pycache__/step4training.cpython-39.pyc
ADDED
|
Binary file (10.8 kB). View file
|
|
|
RegressionEvaluation/api_modifications.md
ADDED
|
@@ -0,0 +1,429 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
TS
|
| 2 |
+
OK, no problem so let's see your CBT training portion works perfectly OK You have an assumption that already said something bad about myself and you're going right into the thought record correct Yes
|
| 3 |
+
|
| 4 |
+
You
|
| 5 |
+
Yeah
|
| 6 |
+
|
| 7 |
+
TS
|
| 8 |
+
OK, I had a prompt earlier that had
|
| 9 |
+
|
| 10 |
+
You
|
| 11 |
+
OK
|
| 12 |
+
|
| 13 |
+
TS
|
| 14 |
+
general instructions of how to hold a conversation and then look Look out for when someone says something bad about themselves
|
| 15 |
+
|
| 16 |
+
You
|
| 17 |
+
Nothing
|
| 18 |
+
|
| 19 |
+
TS
|
| 20 |
+
And then when they said something bad, I told her to do a thought record, and I gave it the instructions and when they were done with the record To come back out of that and talk to them again and repeat the whole process so what you're missing is what you have is the meat of the sandwich you have a slice of
|
| 21 |
+
|
| 22 |
+
You
|
| 23 |
+
Yes
|
| 24 |
+
|
| 25 |
+
TS
|
| 26 |
+
turkey you have the lettuce you have the pickles you even have the
|
| 27 |
+
|
| 28 |
+
You
|
| 29 |
+
Yeah
|
| 30 |
+
|
| 31 |
+
TS
|
| 32 |
+
spice, but what you don't have is the bread around the sandwich you don't have The beginning of a conversation that would lead to the sandwich and you don't have the other part of the conversation which you can hold in your hand to get you back out of the conversation
|
| 33 |
+
|
| 34 |
+
You
|
| 35 |
+
No I understand
|
| 36 |
+
|
| 37 |
+
TS
|
| 38 |
+
You don't have the dreading
|
| 39 |
+
|
| 40 |
+
You
|
| 41 |
+
I'm going to
|
| 42 |
+
|
| 43 |
+
TS
|
| 44 |
+
Does it make sense right? I sent you a prompt couple weeks ago that had the breading. It had test instructions that had the beginning and it would just say hello to you.
|
| 45 |
+
|
| 46 |
+
You
|
| 47 |
+
Yeah
|
| 48 |
+
|
| 49 |
+
TS
|
| 50 |
+
And talk about the weather and but if you said something bad about yourself, you would do a record so just think about how to implement that I don't know if you can implement that all in one
|
| 51 |
+
|
| 52 |
+
You
|
| 53 |
+
He
|
| 54 |
+
|
| 55 |
+
TS
|
| 56 |
+
prompt like you have, but if you can great if you cannot, you'll have to have a prompt that talks to you nicely and then if it
|
| 57 |
+
|
| 58 |
+
You
|
| 59 |
+
OK
|
| 60 |
+
|
| 61 |
+
TS
|
| 62 |
+
determines that it's time to kick off a thought record you probably need that Trump to give you a Jason signal of like oh shit Hey TJ time to pull out the big guns let's get a thought record
|
| 63 |
+
|
| 64 |
+
You
|
| 65 |
+
Yes
|
| 66 |
+
|
| 67 |
+
TS
|
| 68 |
+
started. It could say something like that in an ex attack just like that.
|
| 69 |
+
|
| 70 |
+
You
|
| 71 |
+
Yes
|
| 72 |
+
|
| 73 |
+
TS
|
| 74 |
+
And then then you have a function called oh shit TJ I think it's time we start a record and then that function fires and then does your thing
|
| 75 |
+
|
| 76 |
+
You
|
| 77 |
+
Gas Like a normal engine, but when the moment comes in, it's like what it was right
|
| 78 |
+
|
| 79 |
+
TS
|
| 80 |
+
Yeah, and you should also know when to exit your conversation which generally is when you're done you
|
| 81 |
+
|
| 82 |
+
You
|
| 83 |
+
Yeah
|
| 84 |
+
|
| 85 |
+
TS
|
| 86 |
+
you can you can simplify it how about when you're done with the thought correctly you come back out to the one so not how you do that is up to you because your
|
| 87 |
+
|
| 88 |
+
You
|
| 89 |
+
Yes, yes yes yes yes
|
| 90 |
+
|
| 91 |
+
TS
|
| 92 |
+
API may have to set flags to remind it where it's at in between API calls. There's a lot there right to keep track.
|
| 93 |
+
|
| 94 |
+
You
|
| 95 |
+
Yeah, yeah yeah yes
|
| 96 |
+
|
| 97 |
+
TS
|
| 98 |
+
If you look at my other project, I use flags to determine which question I was on you could do the same. I recommend the same thing.
|
| 99 |
+
|
| 100 |
+
You
|
| 101 |
+
The laser
|
| 102 |
+
|
| 103 |
+
TS
|
| 104 |
+
What's up?
|
| 105 |
+
|
| 106 |
+
You
|
| 107 |
+
Talking about the API
|
| 108 |
+
|
| 109 |
+
TS
|
| 110 |
+
The diary thing has a live API diary Diary
|
| 111 |
+
|
| 112 |
+
You
|
| 113 |
+
Yes, yes yes yes
|
| 114 |
+
|
| 115 |
+
TS
|
| 116 |
+
whatever and that API if you notice has four
|
| 117 |
+
|
| 118 |
+
You
|
| 119 |
+
Yeah
|
| 120 |
+
|
| 121 |
+
TS
|
| 122 |
+
questions that it wants to ask plus the start in an end and it's not good at conversation it's just you talk to it. It's good. It's gonna ask you question one and you keep talking to it a few more times it's gonna ask you questions 23 and four you talk to it one more time it doesn't do anything at all. It's gonna happen. Get you out of that conversation and you'll see the bullying flags so input an output in the API is all structured JSON so that so that the elevator might do something with text but that tickets return to your structure Jason so you know here's what the LM said here's my checklist of questions that were on Because otherwise, you can't stateless Doesn't remember where it's at you have to tell it
|
| 123 |
+
|
| 124 |
+
You
|
| 125 |
+
Yes, yes yes
|
| 126 |
+
|
| 127 |
+
TS
|
| 128 |
+
everything right so
|
| 129 |
+
|
| 130 |
+
You
|
| 131 |
+
Yeah, this is boring The moment we open the chat at the moment user counseling normally, so while he
|
| 132 |
+
|
| 133 |
+
TS
|
| 134 |
+
Yeah
|
| 135 |
+
|
| 136 |
+
You
|
| 137 |
+
was having this conversation Go to work, but it's not a main priority to try to CPT right then the moment when they use the same bad about himself, the patient should start tracking what he was talking about right The normal conversation is the beginning of the conversation right and What she got what you want just discussed about the thing you know talk is identified his problems and everything then he wants to make a normal conversation again then The model normally without acting like a CB thing, right So I need to design system problem It's first objective to focus on regular The moment His problems in tech needs to identify that he is discussing about his personal problems So that the model has to you know know more about the user and do everything. The patient also talk what you saying right and the CPT dude job once it is done we might talk he he wants to the session. This is what we are talking about.
|
| 138 |
+
|
| 139 |
+
TS
|
| 140 |
+
No, that's it that girl
|
| 141 |
+
|
| 142 |
+
You
|
| 143 |
+
Yeah Yeah, you know what if you have time look carefully of what the system bronchi design, which is working with a queen and two I will send you the system prompt by mail look
|
| 144 |
+
|
| 145 |
+
TS
|
| 146 |
+
OK
|
| 147 |
+
|
| 148 |
+
You
|
| 149 |
+
carefully. It is long, but it is it is it is good.
|
| 150 |
+
|
| 151 |
+
TS
|
| 152 |
+
OK, I'll take a look at it email OK I'll see I'll look for it
|
| 153 |
+
|
| 154 |
+
You
|
| 155 |
+
Yeah, I'm sending you right away, but that, but I know I will modify the system at the same time then I will modify the AP
|
| 156 |
+
|
| 157 |
+
TS
|
| 158 |
+
Sounds good man Hey, I'm at. I'm having some luck right now. Fine tuning The NER bar
|
| 159 |
+
|
| 160 |
+
You
|
| 161 |
+
Sorry I was. I wasn't listening. My headphones turned off yeah using something.
|
| 162 |
+
|
| 163 |
+
TS
|
| 164 |
+
I'm having a little I'm having a little bit of luck right now using the fine tuned fine-tuning right now with all my
|
| 165 |
+
|
| 166 |
+
You
|
| 167 |
+
00 the thing cool
|
| 168 |
+
|
| 169 |
+
TS
|
| 170 |
+
examples Yeah, it's it's. It's a production. It's a training right now going yeah I am pretty happy with it.
|
| 171 |
+
|
| 172 |
+
You
|
| 173 |
+
Great man Ha ha you're in a beach
|
| 174 |
+
|
| 175 |
+
TS
|
| 176 |
+
I'm not
|
| 177 |
+
|
| 178 |
+
You
|
| 179 |
+
Paris you have to be happy with someone else
|
| 180 |
+
|
| 181 |
+
TS
|
| 182 |
+
Yeah, yeah yeah I'm at 76 % F1
|
| 183 |
+
|
| 184 |
+
You
|
| 185 |
+
Cool
|
| 186 |
+
|
| 187 |
+
TS
|
| 188 |
+
score right now and I'm about to run a little run a little thing and see how it's doing and run the test right now cause I hold on it's running right now. This is my first test let's see.
|
| 189 |
+
|
| 190 |
+
You
|
| 191 |
+
Let me see how that goes Yeah
|
| 192 |
+
|
| 193 |
+
TS
|
| 194 |
+
Oh shoot freaking I hit a button and close the damn test sucks OK, let's see sample one OK, yeah about half of them did really well Oh yeah yeah OK yeah it's doing OK. I think it's it. Learn new
|
| 195 |
+
|
| 196 |
+
You
|
| 197 |
+
Edit
|
| 198 |
+
|
| 199 |
+
TS
|
| 200 |
+
things.
|
| 201 |
+
|
| 202 |
+
You
|
| 203 |
+
It's working
|
| 204 |
+
|
| 205 |
+
TS
|
| 206 |
+
Yeah, what is it someone? It looks like it looks like it learned some new things. I don't know I like for instance, the word lab results right Aiden didn't know anything about lab results before but it found something related to lab results. That's cool. It's not perfect yet I can't say that work yet but It's doing something definitely doing something
|
| 207 |
+
|
| 208 |
+
You
|
| 209 |
+
Your type put system wrong, which I'm sending you as a stage one we go and go further we look closely on what we actually doing how we are designing the prom and can we make it better?
|
| 210 |
+
|
| 211 |
+
TS
|
| 212 |
+
Look look look for my old prompt I sent you remember I sent you one. Do you see it anywhere?
|
| 213 |
+
|
| 214 |
+
You
|
| 215 |
+
Yes, yes, I tried the same system wrong and put it in the publicity added the the papers and designed this one
|
| 216 |
+
|
| 217 |
+
TS
|
| 218 |
+
Yeah, the part I said he was the one that worked pretty well. I tested it out several times. It's at least it's at least good at talking to
|
| 219 |
+
|
| 220 |
+
You
|
| 221 |
+
Yeah
|
| 222 |
+
|
| 223 |
+
TS
|
| 224 |
+
you and determining when to jump into the conversation and it's also good about getting back out of the conversation so think of it from that view, it was it was it was good.
|
| 225 |
+
|
| 226 |
+
You
|
| 227 |
+
Yes
|
| 228 |
+
|
| 229 |
+
TS
|
| 230 |
+
So anyway, have fun with your man I just wanna give you an update cause I feel like I'm not gonna see Cyrus slipping away but like
|
| 231 |
+
|
| 232 |
+
You
|
| 233 |
+
Why like that?
|
| 234 |
+
|
| 235 |
+
TS
|
| 236 |
+
They don't they don't talk to me as often in the morning like they used to. They don't rely on me as much so my contribution is gonna be pure data science right
|
| 237 |
+
|
| 238 |
+
You
|
| 239 |
+
Nothing
|
| 240 |
+
|
| 241 |
+
TS
|
| 242 |
+
so you know I got my client for the chat by pure data science I don't know, man I'm just a remote worker right now and if I don't deliver some data science. I don't know I really do think the pirates are gonna get your money
|
| 243 |
+
|
| 244 |
+
You
|
| 245 |
+
Cousin
|
| 246 |
+
|
| 247 |
+
TS
|
| 248 |
+
Yeah
|
| 249 |
+
|
| 250 |
+
You
|
| 251 |
+
I have a iPhone Activity and focus
|
| 252 |
+
|
| 253 |
+
TS
|
| 254 |
+
Yeah, and when you're when your dude comes down, what's his name? What's his name again?
|
| 255 |
+
|
| 256 |
+
You
|
| 257 |
+
He's coming in a week
|
| 258 |
+
|
| 259 |
+
TS
|
| 260 |
+
OK, he should
|
| 261 |
+
|
| 262 |
+
You
|
| 263 |
+
Yeah
|
| 264 |
+
|
| 265 |
+
TS
|
| 266 |
+
be working on a nice you This like you know the chat later just
|
| 267 |
+
|
| 268 |
+
You
|
| 269 |
+
Cool I'm glad that you are giving you my book like it
|
| 270 |
+
|
| 271 |
+
TS
|
| 272 |
+
But I mean sign into it that way we just rapidly we don't have to wait for it to show up in their app, but we just keep showing him some stuff
|
| 273 |
+
|
| 274 |
+
You
|
| 275 |
+
Yes, exactly So how about the design for for what do we call the interesting the chat bought That's what you're talking about
|
| 276 |
+
|
| 277 |
+
TS
|
| 278 |
+
I just I just like one in one day remember but I showed it to you written python dash to have the
|
| 279 |
+
|
| 280 |
+
You
|
| 281 |
+
No, I guess but anyway
|
| 282 |
+
|
| 283 |
+
TS
|
| 284 |
+
conversation. Remember what I showed you You said you said damn Travis how did you make that I just whipped it up today and I showed you how to
|
| 285 |
+
|
| 286 |
+
You
|
| 287 |
+
It was Travis, but I'm talking about I forgot the name of the insurance company ebonies chat
|
| 288 |
+
|
| 289 |
+
TS
|
| 290 |
+
00
|
| 291 |
+
|
| 292 |
+
You
|
| 293 |
+
Apple
|
| 294 |
+
|
| 295 |
+
TS
|
| 296 |
+
0 to the other chat, we're working on Let's work on some UI to showcase your work at
|
| 297 |
+
|
| 298 |
+
You
|
| 299 |
+
Yes, yes thank you
|
| 300 |
+
|
| 301 |
+
TS
|
| 302 |
+
Sarah so that we keep the job and then we'll ship us some products and get into some kick ass shit but that way we don't have any
|
| 303 |
+
|
| 304 |
+
You
|
| 305 |
+
Yes, I get that
|
| 306 |
+
|
| 307 |
+
TS
|
| 308 |
+
dependencies waiting for everybody likes a good demo right
|
| 309 |
+
|
| 310 |
+
You
|
| 311 |
+
Yes, yes yes yes
|
| 312 |
+
|
| 313 |
+
TS
|
| 314 |
+
so You know like going to a swagger API Let's have him whip it up in the shape like real quick so that like
|
| 315 |
+
|
| 316 |
+
You
|
| 317 |
+
I don't wanna talk about this right now, but I've been going through a tool called WY which is the use of friendly things that can enable to
|
| 318 |
+
|
| 319 |
+
TS
|
| 320 |
+
Oh
|
| 321 |
+
|
| 322 |
+
You
|
| 323 |
+
llama
|
| 324 |
+
|
| 325 |
+
TS
|
| 326 |
+
Oh yeah, open open web you guys really really nice
|
| 327 |
+
|
| 328 |
+
You
|
| 329 |
+
Yeah, I've been going through it
|
| 330 |
+
|
| 331 |
+
TS
|
| 332 |
+
but but but but that's not but open web. I don't think he's gonna call your chat in point like open Web is gonna chat directly with. Like a fine tune model or a model running on llamas it's not gonna have your python logic that you just made
|
| 333 |
+
|
| 334 |
+
You
|
| 335 |
+
Object of Afghanistan
|
| 336 |
+
|
| 337 |
+
TS
|
| 338 |
+
You're gonna need a layer of python logic to keep track of context beyond what an LLM can do right
|
| 339 |
+
|
| 340 |
+
You
|
| 341 |
+
Yeah, yeah yeah yes
|
| 342 |
+
|
| 343 |
+
TS
|
| 344 |
+
so so that's maybe not a thing that you can do
|
| 345 |
+
|
| 346 |
+
You
|
| 347 |
+
Wiley Why I want to use open the house because I'm going to big Bertha opening a llama from the model pasting near the whole conversation in the API and I don't wanna do that when I was about to show you something I want something as smooth interface I can connect with it and put the conversation on chat and write in front of that. Want to connect the API from the interface and.com see the results. That's what I was planned to do but yeah nice idea. Just tell me.
|
| 348 |
+
|
| 349 |
+
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
Your boss is giving you feedback on your CBT system. The key points are:
|
| 353 |
+
|
| 354 |
+
1. **Missing the "bread of the sandwich"** - You have the core CBT thought record functionality (the "meat"), but you're missing:
|
| 355 |
+
- The casual conversation phase that leads up to identifying problems
|
| 356 |
+
- The transition back to normal conversation after CBT is complete
|
| 357 |
+
|
| 358 |
+
2. **Need for conversation flow management** - Your boss suggests you need:
|
| 359 |
+
- A system that starts with friendly chat and watches for self-defeating statements
|
| 360 |
+
- A trigger mechanism (like a JSON signal) to switch into CBT mode
|
| 361 |
+
- Flags/state management to track where you are in the conversation flow
|
| 362 |
+
|
| 363 |
+
3. **API state management** - Since APIs are stateless, you need to:
|
| 364 |
+
- Use structured JSON input/output
|
| 365 |
+
- Maintain flags to remember conversation state between calls
|
| 366 |
+
- Track which CBT question you're currently on
|
| 367 |
+
|
| 368 |
+
## From the Robust System Prompt:
|
| 369 |
+
|
| 370 |
+
This addresses exactly what your boss mentioned! It defines:
|
| 371 |
+
|
| 372 |
+
1. **Two-phase operation**:
|
| 373 |
+
- **Phase 1: Chit-Chat** - Natural conversation until self-defeating statement detected
|
| 374 |
+
- **Phase 2: CBT Refactoring** - Structured 5-step question sequence
|
| 375 |
+
|
| 376 |
+
2. **Clear transition triggers** - When user expresses self-defeating beliefs
|
| 377 |
+
|
| 378 |
+
3. **Structured CBT sequence** with specific questions (1.0 → 1.1 → 1.2 → 1.3 → 1.4 → 1.5)
|
| 379 |
+
|
| 380 |
+
4. **Conversation management rules** for handling avoidance, resistance, etc.
|
| 381 |
+
|
| 382 |
+
## From your API code:
|
| 383 |
+
|
| 384 |
+
Your current `gemini_qa_evaluator_api.py` has the evaluation logic but seems focused on scoring responses rather than managing the conversation flow your boss described.
|
| 385 |
+
|
| 386 |
+
**The gap**: You need to modify your API to implement the two-phase system from the robust prompt, with proper state management to track conversation phases and CBT question progression between API calls.
|
| 387 |
+
|
| 388 |
+
> Tokens: 11k sent, 434 received. Cost: $0.04 message, $0.04 session.
|
| 389 |
+
> Evaluations/Gemini_QA_Testing/gemini_qa_evaluator_api.py
|
| 390 |
+
> Add file to the chat? (Y)es/(N)o/(D)on't ask again [Yes]: n
|
| 391 |
+
|
| 392 |
+
#### /ask FYI my api file is step4training.py
|
| 393 |
+
|
| 394 |
+
#### FYI my api file is step4training.py
|
| 395 |
+
|
| 396 |
+
Thank you for the clarification! Looking at `step4training.py`, I can see this is your CBT evaluator training and API code. Here's what I understand:
|
| 397 |
+
|
| 398 |
+
## From step4training.py:
|
| 399 |
+
|
| 400 |
+
This file contains:
|
| 401 |
+
|
| 402 |
+
1. **A training system** (`CBTEvaluatorSimple` class) that:
|
| 403 |
+
- Processes conversation data to train a model
|
| 404 |
+
- Uses TF-IDF vectorization and Gradient Boosting to predict CBT scores
|
| 405 |
+
- Evaluates conversations on 5 dimensions: thought_identification, emotion_exploration, distortion_identification, evidence_gathering, balanced_thinking
|
| 406 |
+
|
| 407 |
+
2. **A Flask API** (`create_flask_app`) that:
|
| 408 |
+
- Loads the trained model
|
| 409 |
+
- Has an `/evaluate` endpoint that scores conversations
|
| 410 |
+
- Returns predictions for the 5 CBT dimensions
|
| 411 |
+
|
| 412 |
+
## The Gap Your Boss Identified:
|
| 413 |
+
|
| 414 |
+
Your current API is an **evaluator/scorer** - it takes a complete conversation and gives it scores. But what your boss wants is a **conversational agent** that:
|
| 415 |
+
|
| 416 |
+
1. **Conducts the conversation** (not just evaluates it)
|
| 417 |
+
2. **Manages conversation state** between API calls
|
| 418 |
+
3. **Switches between casual chat and CBT mode** based on triggers
|
| 419 |
+
4. **Tracks progress through the CBT question sequence**
|
| 420 |
+
|
| 421 |
+
## What You Need to Build:
|
| 422 |
+
|
| 423 |
+
A new API that implements the robust system prompt's two-phase approach:
|
| 424 |
+
- **Phase 1 endpoint**: Handles casual conversation, detects self-defeating statements
|
| 425 |
+
- **Phase 2 endpoint**: Manages the structured CBT question sequence (1.0 → 1.1 → 1.2 → 1.3 → 1.4 → 1.5)
|
| 426 |
+
- **State management**: JSON flags to track conversation phase and current CBT question
|
| 427 |
+
- **Transition logic**: Switches from Phase 1 to Phase 2 when triggers are detected
|
| 428 |
+
|
| 429 |
+
Your current `step4training.py` could be used to evaluate the quality of conversations generated by this new conversational API, but it's not the conversational agent itself that your boss is asking for.
|
RegressionEvaluation/cbt_evaluator_data/dataset_dict.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"splits": ["train", "validation", "test"]}
|
RegressionEvaluation/cbt_evaluator_data/test/data-00000-of-00001.arrow
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a14550d1a97850de93334b495a6fea6326ee639f617582576ca1b3d6430e29a
|
| 3 |
+
size 976992
|
RegressionEvaluation/cbt_evaluator_data/test/dataset_info.json
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"citation": "",
|
| 3 |
+
"description": "",
|
| 4 |
+
"features": {
|
| 5 |
+
"id": {
|
| 6 |
+
"dtype": "string",
|
| 7 |
+
"_type": "Value"
|
| 8 |
+
},
|
| 9 |
+
"conversation_text": {
|
| 10 |
+
"dtype": "string",
|
| 11 |
+
"_type": "Value"
|
| 12 |
+
},
|
| 13 |
+
"thought_identification": {
|
| 14 |
+
"dtype": "float64",
|
| 15 |
+
"_type": "Value"
|
| 16 |
+
},
|
| 17 |
+
"emotion_exploration": {
|
| 18 |
+
"dtype": "float64",
|
| 19 |
+
"_type": "Value"
|
| 20 |
+
},
|
| 21 |
+
"distortion_identification": {
|
| 22 |
+
"dtype": "float64",
|
| 23 |
+
"_type": "Value"
|
| 24 |
+
},
|
| 25 |
+
"evidence_gathering": {
|
| 26 |
+
"dtype": "float64",
|
| 27 |
+
"_type": "Value"
|
| 28 |
+
},
|
| 29 |
+
"balanced_thinking": {
|
| 30 |
+
"dtype": "float64",
|
| 31 |
+
"_type": "Value"
|
| 32 |
+
},
|
| 33 |
+
"total_turns": {
|
| 34 |
+
"dtype": "int64",
|
| 35 |
+
"_type": "Value"
|
| 36 |
+
},
|
| 37 |
+
"turns_included": {
|
| 38 |
+
"dtype": "int64",
|
| 39 |
+
"_type": "Value"
|
| 40 |
+
},
|
| 41 |
+
"initial_problem": {
|
| 42 |
+
"dtype": "string",
|
| 43 |
+
"_type": "Value"
|
| 44 |
+
},
|
| 45 |
+
"conversation_progress": {
|
| 46 |
+
"dtype": "float64",
|
| 47 |
+
"_type": "Value"
|
| 48 |
+
},
|
| 49 |
+
"__index_level_0__": {
|
| 50 |
+
"dtype": "int64",
|
| 51 |
+
"_type": "Value"
|
| 52 |
+
},
|
| 53 |
+
"input_ids": {
|
| 54 |
+
"feature": {
|
| 55 |
+
"dtype": "int32",
|
| 56 |
+
"_type": "Value"
|
| 57 |
+
},
|
| 58 |
+
"_type": "Sequence"
|
| 59 |
+
},
|
| 60 |
+
"attention_mask": {
|
| 61 |
+
"feature": {
|
| 62 |
+
"dtype": "int8",
|
| 63 |
+
"_type": "Value"
|
| 64 |
+
},
|
| 65 |
+
"_type": "Sequence"
|
| 66 |
+
}
|
| 67 |
+
},
|
| 68 |
+
"homepage": "",
|
| 69 |
+
"license": ""
|
| 70 |
+
}
|
RegressionEvaluation/cbt_evaluator_data/test/state.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_data_files": [
|
| 3 |
+
{
|
| 4 |
+
"filename": "data-00000-of-00001.arrow"
|
| 5 |
+
}
|
| 6 |
+
],
|
| 7 |
+
"_fingerprint": "a5dfd42da74c9a05",
|
| 8 |
+
"_format_columns": null,
|
| 9 |
+
"_format_kwargs": {},
|
| 10 |
+
"_format_type": null,
|
| 11 |
+
"_output_all_columns": false,
|
| 12 |
+
"_split": null
|
| 13 |
+
}
|
RegressionEvaluation/cbt_evaluator_data/tokenizer/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
RegressionEvaluation/cbt_evaluator_data/tokenizer/special_tokens_map.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "<s>",
|
| 3 |
+
"cls_token": "<s>",
|
| 4 |
+
"eos_token": "</s>",
|
| 5 |
+
"mask_token": {
|
| 6 |
+
"content": "<mask>",
|
| 7 |
+
"lstrip": true,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false
|
| 11 |
+
},
|
| 12 |
+
"pad_token": "<pad>",
|
| 13 |
+
"sep_token": "</s>",
|
| 14 |
+
"unk_token": "<unk>"
|
| 15 |
+
}
|
RegressionEvaluation/cbt_evaluator_data/tokenizer/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
RegressionEvaluation/cbt_evaluator_data/tokenizer/tokenizer_config.json
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"added_tokens_decoder": {
|
| 4 |
+
"0": {
|
| 5 |
+
"content": "<s>",
|
| 6 |
+
"lstrip": false,
|
| 7 |
+
"normalized": true,
|
| 8 |
+
"rstrip": false,
|
| 9 |
+
"single_word": false,
|
| 10 |
+
"special": true
|
| 11 |
+
},
|
| 12 |
+
"1": {
|
| 13 |
+
"content": "<pad>",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": true,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false,
|
| 18 |
+
"special": true
|
| 19 |
+
},
|
| 20 |
+
"2": {
|
| 21 |
+
"content": "</s>",
|
| 22 |
+
"lstrip": false,
|
| 23 |
+
"normalized": true,
|
| 24 |
+
"rstrip": false,
|
| 25 |
+
"single_word": false,
|
| 26 |
+
"special": true
|
| 27 |
+
},
|
| 28 |
+
"3": {
|
| 29 |
+
"content": "<unk>",
|
| 30 |
+
"lstrip": false,
|
| 31 |
+
"normalized": true,
|
| 32 |
+
"rstrip": false,
|
| 33 |
+
"single_word": false,
|
| 34 |
+
"special": true
|
| 35 |
+
},
|
| 36 |
+
"50264": {
|
| 37 |
+
"content": "<mask>",
|
| 38 |
+
"lstrip": true,
|
| 39 |
+
"normalized": false,
|
| 40 |
+
"rstrip": false,
|
| 41 |
+
"single_word": false,
|
| 42 |
+
"special": true
|
| 43 |
+
}
|
| 44 |
+
},
|
| 45 |
+
"bos_token": "<s>",
|
| 46 |
+
"clean_up_tokenization_spaces": false,
|
| 47 |
+
"cls_token": "<s>",
|
| 48 |
+
"eos_token": "</s>",
|
| 49 |
+
"errors": "replace",
|
| 50 |
+
"extra_special_tokens": {},
|
| 51 |
+
"mask_token": "<mask>",
|
| 52 |
+
"model_max_length": 512,
|
| 53 |
+
"pad_token": "<pad>",
|
| 54 |
+
"sep_token": "</s>",
|
| 55 |
+
"tokenizer_class": "RobertaTokenizer",
|
| 56 |
+
"trim_offsets": true,
|
| 57 |
+
"unk_token": "<unk>"
|
| 58 |
+
}
|
RegressionEvaluation/cbt_evaluator_data/tokenizer/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
RegressionEvaluation/cbt_evaluator_data/train/data-00000-of-00001.arrow
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce4f73f3a67b58f692537289daccccca7ffc1982a27cb1dce0ce1bc46d732383
|
| 3 |
+
size 4852328
|
RegressionEvaluation/cbt_evaluator_data/train/dataset_info.json
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"citation": "",
|
| 3 |
+
"description": "",
|
| 4 |
+
"features": {
|
| 5 |
+
"id": {
|
| 6 |
+
"dtype": "string",
|
| 7 |
+
"_type": "Value"
|
| 8 |
+
},
|
| 9 |
+
"conversation_text": {
|
| 10 |
+
"dtype": "string",
|
| 11 |
+
"_type": "Value"
|
| 12 |
+
},
|
| 13 |
+
"thought_identification": {
|
| 14 |
+
"dtype": "float64",
|
| 15 |
+
"_type": "Value"
|
| 16 |
+
},
|
| 17 |
+
"emotion_exploration": {
|
| 18 |
+
"dtype": "float64",
|
| 19 |
+
"_type": "Value"
|
| 20 |
+
},
|
| 21 |
+
"distortion_identification": {
|
| 22 |
+
"dtype": "float64",
|
| 23 |
+
"_type": "Value"
|
| 24 |
+
},
|
| 25 |
+
"evidence_gathering": {
|
| 26 |
+
"dtype": "float64",
|
| 27 |
+
"_type": "Value"
|
| 28 |
+
},
|
| 29 |
+
"balanced_thinking": {
|
| 30 |
+
"dtype": "float64",
|
| 31 |
+
"_type": "Value"
|
| 32 |
+
},
|
| 33 |
+
"total_turns": {
|
| 34 |
+
"dtype": "int64",
|
| 35 |
+
"_type": "Value"
|
| 36 |
+
},
|
| 37 |
+
"turns_included": {
|
| 38 |
+
"dtype": "int64",
|
| 39 |
+
"_type": "Value"
|
| 40 |
+
},
|
| 41 |
+
"initial_problem": {
|
| 42 |
+
"dtype": "string",
|
| 43 |
+
"_type": "Value"
|
| 44 |
+
},
|
| 45 |
+
"conversation_progress": {
|
| 46 |
+
"dtype": "float64",
|
| 47 |
+
"_type": "Value"
|
| 48 |
+
},
|
| 49 |
+
"__index_level_0__": {
|
| 50 |
+
"dtype": "int64",
|
| 51 |
+
"_type": "Value"
|
| 52 |
+
},
|
| 53 |
+
"input_ids": {
|
| 54 |
+
"feature": {
|
| 55 |
+
"dtype": "int32",
|
| 56 |
+
"_type": "Value"
|
| 57 |
+
},
|
| 58 |
+
"_type": "Sequence"
|
| 59 |
+
},
|
| 60 |
+
"attention_mask": {
|
| 61 |
+
"feature": {
|
| 62 |
+
"dtype": "int8",
|
| 63 |
+
"_type": "Value"
|
| 64 |
+
},
|
| 65 |
+
"_type": "Sequence"
|
| 66 |
+
}
|
| 67 |
+
},
|
| 68 |
+
"homepage": "",
|
| 69 |
+
"license": ""
|
| 70 |
+
}
|
RegressionEvaluation/cbt_evaluator_data/train/state.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_data_files": [
|
| 3 |
+
{
|
| 4 |
+
"filename": "data-00000-of-00001.arrow"
|
| 5 |
+
}
|
| 6 |
+
],
|
| 7 |
+
"_fingerprint": "47d96b74bfc1eb76",
|
| 8 |
+
"_format_columns": null,
|
| 9 |
+
"_format_kwargs": {},
|
| 10 |
+
"_format_type": null,
|
| 11 |
+
"_output_all_columns": false,
|
| 12 |
+
"_split": null
|
| 13 |
+
}
|
RegressionEvaluation/cbt_evaluator_data/validation/data-00000-of-00001.arrow
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ad41d971d2935fea7d1c69c72d88af9fa71ad193225a75a0eb16dcc1b0af73c
|
| 3 |
+
size 1219856
|
RegressionEvaluation/cbt_evaluator_data/validation/dataset_info.json
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"citation": "",
|
| 3 |
+
"description": "",
|
| 4 |
+
"features": {
|
| 5 |
+
"id": {
|
| 6 |
+
"dtype": "string",
|
| 7 |
+
"_type": "Value"
|
| 8 |
+
},
|
| 9 |
+
"conversation_text": {
|
| 10 |
+
"dtype": "string",
|
| 11 |
+
"_type": "Value"
|
| 12 |
+
},
|
| 13 |
+
"thought_identification": {
|
| 14 |
+
"dtype": "float64",
|
| 15 |
+
"_type": "Value"
|
| 16 |
+
},
|
| 17 |
+
"emotion_exploration": {
|
| 18 |
+
"dtype": "float64",
|
| 19 |
+
"_type": "Value"
|
| 20 |
+
},
|
| 21 |
+
"distortion_identification": {
|
| 22 |
+
"dtype": "float64",
|
| 23 |
+
"_type": "Value"
|
| 24 |
+
},
|
| 25 |
+
"evidence_gathering": {
|
| 26 |
+
"dtype": "float64",
|
| 27 |
+
"_type": "Value"
|
| 28 |
+
},
|
| 29 |
+
"balanced_thinking": {
|
| 30 |
+
"dtype": "float64",
|
| 31 |
+
"_type": "Value"
|
| 32 |
+
},
|
| 33 |
+
"total_turns": {
|
| 34 |
+
"dtype": "int64",
|
| 35 |
+
"_type": "Value"
|
| 36 |
+
},
|
| 37 |
+
"turns_included": {
|
| 38 |
+
"dtype": "int64",
|
| 39 |
+
"_type": "Value"
|
| 40 |
+
},
|
| 41 |
+
"initial_problem": {
|
| 42 |
+
"dtype": "string",
|
| 43 |
+
"_type": "Value"
|
| 44 |
+
},
|
| 45 |
+
"conversation_progress": {
|
| 46 |
+
"dtype": "float64",
|
| 47 |
+
"_type": "Value"
|
| 48 |
+
},
|
| 49 |
+
"__index_level_0__": {
|
| 50 |
+
"dtype": "int64",
|
| 51 |
+
"_type": "Value"
|
| 52 |
+
},
|
| 53 |
+
"input_ids": {
|
| 54 |
+
"feature": {
|
| 55 |
+
"dtype": "int32",
|
| 56 |
+
"_type": "Value"
|
| 57 |
+
},
|
| 58 |
+
"_type": "Sequence"
|
| 59 |
+
},
|
| 60 |
+
"attention_mask": {
|
| 61 |
+
"feature": {
|
| 62 |
+
"dtype": "int8",
|
| 63 |
+
"_type": "Value"
|
| 64 |
+
},
|
| 65 |
+
"_type": "Sequence"
|
| 66 |
+
}
|
| 67 |
+
},
|
| 68 |
+
"homepage": "",
|
| 69 |
+
"license": ""
|
| 70 |
+
}
|
RegressionEvaluation/cbt_evaluator_data/validation/state.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_data_files": [
|
| 3 |
+
{
|
| 4 |
+
"filename": "data-00000-of-00001.arrow"
|
| 5 |
+
}
|
| 6 |
+
],
|
| 7 |
+
"_fingerprint": "8b7d290acee9e6a9",
|
| 8 |
+
"_format_columns": null,
|
| 9 |
+
"_format_kwargs": {},
|
| 10 |
+
"_format_type": null,
|
| 11 |
+
"_output_all_columns": false,
|
| 12 |
+
"_split": null
|
| 13 |
+
}
|
RegressionEvaluation/cbt_evaluator_simple/metrics.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"overall": {
|
| 3 |
+
"mse": 0.015408458057955016,
|
| 4 |
+
"r2": 0.8579161244122858
|
| 5 |
+
},
|
| 6 |
+
"dimensions": {
|
| 7 |
+
"thought_identification": {
|
| 8 |
+
"mse": 0.0005171077722237684,
|
| 9 |
+
"r2": 0.9488490553780022
|
| 10 |
+
},
|
| 11 |
+
"emotion_exploration": {
|
| 12 |
+
"mse": 0.004230827797500558,
|
| 13 |
+
"r2": 0.9134436984863282
|
| 14 |
+
},
|
| 15 |
+
"distortion_identification": {
|
| 16 |
+
"mse": 0.015500641867672431,
|
| 17 |
+
"r2": 0.7668513366397565
|
| 18 |
+
},
|
| 19 |
+
"evidence_gathering": {
|
| 20 |
+
"mse": 0.02063767635930626,
|
| 21 |
+
"r2": 0.8227466515848478
|
| 22 |
+
},
|
| 23 |
+
"balanced_thinking": {
|
| 24 |
+
"mse": 0.03615603649307203,
|
| 25 |
+
"r2": 0.837689879972495
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
}
|
RegressionEvaluation/cbt_evaluator_simple/model.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b66f93ac68505544c91045985960ae584a604ef7c83f57d48cdfc8c1c2738241
|
| 3 |
+
size 1608454
|
RegressionEvaluation/cbt_evaluator_simple/predictions_vs_actual.png
ADDED
|
RegressionEvaluation/cbt_evaluator_simple/processed_data.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
RegressionEvaluation/cbt_evaluator_simple/vectorizer.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:488e6ed430504cbe6e9a69a4ef838b84243240d071e8e4fa0d73a43ae11bbf22
|
| 3 |
+
size 207096
|
RegressionEvaluation/cbt_evaluator_training_data.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
RegressionEvaluation/cbt_evaluator_training_data.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
RegressionEvaluation/evaluate_conversation.py
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import json
|
| 3 |
+
|
| 4 |
+
def format_conversation(raw_conversation):
|
| 5 |
+
"""Format a raw conversation into the expected format for the CBT evaluator."""
|
| 6 |
+
# Split the conversation into lines
|
| 7 |
+
lines = raw_conversation.strip().split('\n')
|
| 8 |
+
|
| 9 |
+
formatted_text = ""
|
| 10 |
+
current_speaker = None
|
| 11 |
+
current_message = ""
|
| 12 |
+
|
| 13 |
+
for line in lines:
|
| 14 |
+
line = line.strip()
|
| 15 |
+
if not line:
|
| 16 |
+
continue
|
| 17 |
+
|
| 18 |
+
# Check if this is a new speaker
|
| 19 |
+
if line.startswith('>>>'):
|
| 20 |
+
# User message
|
| 21 |
+
if current_speaker:
|
| 22 |
+
# Add the previous message
|
| 23 |
+
formatted_text += f"{current_speaker}: {current_message.strip()}\n\n"
|
| 24 |
+
|
| 25 |
+
current_speaker = "User"
|
| 26 |
+
current_message = line.replace('>>>', '').strip()
|
| 27 |
+
elif current_speaker == "User" and not line.startswith('>>>'):
|
| 28 |
+
# This is the assistant's response
|
| 29 |
+
formatted_text += f"User: {current_message.strip()}\n\n"
|
| 30 |
+
current_speaker = "Assistant"
|
| 31 |
+
current_message = line
|
| 32 |
+
elif current_speaker == "Assistant":
|
| 33 |
+
# Continue the assistant's message
|
| 34 |
+
current_message += " " + line
|
| 35 |
+
|
| 36 |
+
# Add the last message
|
| 37 |
+
if current_speaker:
|
| 38 |
+
formatted_text += f"{current_speaker}: {current_message.strip()}\n\n"
|
| 39 |
+
|
| 40 |
+
return formatted_text.strip()
|
| 41 |
+
|
| 42 |
+
def evaluate_conversation(conversation_text, api_url="http://127.0.0.1:5009/evaluate"):
|
| 43 |
+
"""Send a conversation to the CBT evaluator API and get the scores."""
|
| 44 |
+
response = requests.post(
|
| 45 |
+
api_url,
|
| 46 |
+
headers={"Content-Type": "application/json"},
|
| 47 |
+
json={"conversation": conversation_text}
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
if response.status_code == 200:
|
| 51 |
+
return response.json()
|
| 52 |
+
else:
|
| 53 |
+
print(f"Error: {response.status_code}")
|
| 54 |
+
print(response.text)
|
| 55 |
+
return None
|
| 56 |
+
|
| 57 |
+
# Example conversation
|
| 58 |
+
raw_conversation = """
|
| 59 |
+
Hello! I'm here to chat with you about anything on your mind. How's your day been so far?
|
| 60 |
+
|
| 61 |
+
>>> Hey there!
|
| 62 |
+
Hi! It's great to hear from you. What's been going on lately? Anything interesting happening in your world?
|
| 63 |
+
|
| 64 |
+
>>> nothing. I am alright just having lot of things running in my mind
|
| 65 |
+
I see. Sometimes it can feel overwhelming when we have a lot of thoughts buzzing around. Is there something specific that's been weighing on your mind lately? You don't have to go into details if you
|
| 66 |
+
don't want to, but sometimes talking about it can help sort through those thoughts.
|
| 67 |
+
|
| 68 |
+
>>> sure, I feel like I am working alot but not productive. I am 25 and I am working for almost 16hours a day thinking about the future all the time. it's just confusing like my age people are having fun,
|
| 69 |
+
... having girlfriends, here I am working extensively.
|
| 70 |
+
Thank you for sharing that with me. It sounds like a tough situation. Could you tell me what moods you're feeling right now, and maybe rate their intensity from 0 to 100%?
|
| 71 |
+
|
| 72 |
+
>>> 75 maybe
|
| 73 |
+
Got it. So, you're feeling around 75% of something challenging right now. What evidence from your situation supports the view that "I am working a lot but not productive"? Are there specific moments
|
| 74 |
+
or tasks where you felt this way?
|
| 75 |
+
|
| 76 |
+
>>> I want to want hard and do multiple things, but when I see the end of the day, every work is in pending. that 's where I feel like I am working hard but not productive.
|
| 77 |
+
Okay, thank you for sharing that perspective. Now, can you think of any experiences, even small ones, that might contradict the view that "I'm working a lot but not productive"? For example, are
|
| 78 |
+
there any tasks or projects where you felt a sense of accomplishment or progress?
|
| 79 |
+
|
| 80 |
+
>>> yea, I have been working on a AI project which is still pending, besides, my boss gave me a task that has to be ocmpleted soon, but still I am working on that project.
|
| 81 |
+
That's a good example. Can you think of any others? Sometimes even little things count.
|
| 82 |
+
|
| 83 |
+
>>> right not that's all
|
| 84 |
+
Alright, thank you for sharing those examples. Now that we've looked at both perspectives, how could you rephrase your original thought in a way that feels more balanced to you, considering the
|
| 85 |
+
evidence and experiences you mentioned?
|
| 86 |
+
|
| 87 |
+
>>> what do you mean?
|
| 88 |
+
I mean, given what you've told me about working on the AI project and completing tasks for your boss, can you find a way to describe your situation that acknowledges both the challenges and the
|
| 89 |
+
progress you're making? For example, instead of saying "I'm working hard but not productive," how might you phrase it in a more balanced way?
|
| 90 |
+
|
| 91 |
+
>>> I am trying to work and complete it but I am failing to complete the task on time.
|
| 92 |
+
Now that we've looked at examples for and against your original thought, how are you feeling now? Earlier you mentioned feeling around 75%. How would you rate those same moods on a scale of 0-100%
|
| 93 |
+
now?
|
| 94 |
+
|
| 95 |
+
>>> not at all. still 75%. in fact, I have to work now but I am chatting with you.
|
| 96 |
+
I understand that you feel like you're not completing tasks on time and that's contributing to your current mood. Let's try to find a more balanced way of looking at your situation.
|
| 97 |
+
|
| 98 |
+
You mentioned working hard on an AI project and having another task from your boss. These are significant responsibilities. It's possible to acknowledge the challenges while also recognizing your
|
| 99 |
+
efforts and progress.
|
| 100 |
+
|
| 101 |
+
Could you maybe say something like, "I'm trying to balance multiple important tasks, which is challenging, but I am making progress even if it's not as fast as I'd like"?
|
| 102 |
+
|
| 103 |
+
>>> what happens if I say that?
|
| 104 |
+
If you rephrase your thought in a more balanced way, such as "I'm trying to balance multiple important tasks, which is challenging, but I am making progress even if it's not as fast as I'd like," you
|
| 105 |
+
might start to see your situation from a different perspective. This can help reduce some of the negative feelings and give you a clearer view of what you're actually achieving.
|
| 106 |
+
|
| 107 |
+
How does that sound to you?
|
| 108 |
+
|
| 109 |
+
>>> it is great, but I want a practical solution. I want a solution that just helps my mind to get back to work and get my things together.
|
| 110 |
+
I understand that you're looking for a practical solution to help you refocus on your tasks. Here's what we can do:
|
| 111 |
+
|
| 112 |
+
First, let's try setting small, manageable goals for yourself. Break down your tasks into smaller steps that you can complete in shorter periods of time. This will make them feel less overwhelming
|
| 113 |
+
and more achievable.
|
| 114 |
+
|
| 115 |
+
Second, consider taking short breaks between work sessions to give your mind a chance to rest and recharge. You might use techniques like the Pomodoro Technique (25 minutes of focused work followed
|
| 116 |
+
by a 5-minute break).
|
| 117 |
+
|
| 118 |
+
Third, prioritize your tasks based on urgency and importance. Focus on completing the most urgent or important task first before moving on to less critical ones.
|
| 119 |
+
|
| 120 |
+
Finally, try to be kind to yourself and recognize that it's normal to feel overwhelmed sometimes. It's okay if every single thing isn't completed by the end of the day as long as you're making
|
| 121 |
+
progress towards your goals.
|
| 122 |
+
|
| 123 |
+
Would these steps help you get back to work?
|
| 124 |
+
|
| 125 |
+
>>> this might help
|
| 126 |
+
Great! Setting small, manageable goals and taking breaks can definitely make a difference in how productive you feel. Here's a quick summary of what you mentioned:
|
| 127 |
+
|
| 128 |
+
1. **Set Small Goals**: Break down tasks into smaller, more achievable steps.
|
| 129 |
+
2. **Take Short Breaks**: Use techniques like the Pomodoro Technique to rest your mind between work sessions.
|
| 130 |
+
3. **Prioritize Tasks**: Focus on urgent and important tasks first.
|
| 131 |
+
4. **Be Kind to Yourself**: Acknowledge that feeling overwhelmed is normal and recognize progress.
|
| 132 |
+
|
| 133 |
+
Now, how does that make you feel about getting back to work? Does this help clarify things for you?
|
| 134 |
+
|
| 135 |
+
>>> yea, kind of
|
| 136 |
+
That's a good start! If you're ready, we can chat more about it or if there's anything else on your mind.
|
| 137 |
+
|
| 138 |
+
>>> right now, I only have the productivity problem
|
| 139 |
+
Got it. Let's focus on productivity then. Have you tried any specific techniques in the past to improve your productivity? What worked well for you, and what didn't work as expected? Understanding
|
| 140 |
+
what has worked before can help tailor better strategies moving forward.
|
| 141 |
+
|
| 142 |
+
>>> keeping my mind focussed have helped me alot
|
| 143 |
+
That sounds like a great strategy! Keeping your mind focused is key to improving productivity. Let's try to expand on that:
|
| 144 |
+
|
| 145 |
+
1. **Focus Techniques**: What specific techniques do you use to keep yourself focused? Do you find that certain environments or conditions help more than others?
|
| 146 |
+
|
| 147 |
+
2. **Eliminate Distractions**: Are there any common distractions you can identify and maybe reduce? For example, turning off notifications or finding a quiet workspace.
|
| 148 |
+
|
| 149 |
+
3. **Mindfulness and Breaks**: Sometimes taking short mindfulness breaks can actually improve focus for the longer sessions. Have you tried that before?
|
| 150 |
+
|
| 151 |
+
4. **Tools and Apps**: Are there any productivity tools or apps you use to stay on track? Things like to-do lists, timers, or focus apps might help.
|
| 152 |
+
|
| 153 |
+
Would you like to dive deeper into any of these strategies, or is there another aspect of productivity you want to explore?
|
| 154 |
+
|
| 155 |
+
>>> these works better.
|
| 156 |
+
Great! It's good to know that focusing techniques have worked well for you in the past. Let's refine and expand on those:
|
| 157 |
+
|
| 158 |
+
1. **Create a Focused Workspace**: Make sure your workspace is conducive to concentration. This might mean having a clean desk, minimizing clutter, or finding a quiet place where you can work without
|
| 159 |
+
interruptions.
|
| 160 |
+
|
| 161 |
+
2. **Use Time Blocking**: Allocate specific time slots for different tasks throughout your day. This can help you stay focused and avoid multitasking, which often decreases productivity.
|
| 162 |
+
|
| 163 |
+
3. **Take Regular Breaks**: Even short breaks can refresh your mind. Consider techniques like the Pomodoro Technique (work for 25 minutes, then take a 5-minute break).
|
| 164 |
+
|
| 165 |
+
4. **Set Clear Goals**: Before starting work each day or session, set clear goals for what you want to achieve. This helps keep you motivated and focused on the task at hand.
|
| 166 |
+
|
| 167 |
+
5. **Eliminate Distractions**: Identify common distractions and try to minimize them. This could mean turning off notifications, using website blockers, or finding a quiet place to work.
|
| 168 |
+
|
| 169 |
+
6. **Mindfulness Practices**: Incorporating short mindfulness practices can improve your ability to focus. Even taking a few deep breaths or practicing meditation for 5-10 minutes before starting
|
| 170 |
+
work can make a difference.
|
| 171 |
+
|
| 172 |
+
Would any of these strategies help you further improve your productivity?
|
| 173 |
+
|
| 174 |
+
>>> yea, I will try those.
|
| 175 |
+
That sounds like a solid plan! Let's summarize what we've discussed so far:
|
| 176 |
+
|
| 177 |
+
### Productivity Strategies:
|
| 178 |
+
1. **Create a Focused Workspace**: Ensure your workspace is clean and free from distractions.
|
| 179 |
+
2. **Use Time Blocking**: Allocate specific time slots for different tasks to avoid multitasking.
|
| 180 |
+
3. **Take Regular Breaks**: Use techniques like the Pomodoro Technique (25 minutes work, 5 minutes break).
|
| 181 |
+
4. **Set Clear Goals**: Define what you want to achieve before starting each work session.
|
| 182 |
+
5. **Eliminate Distractions**: Turn off notifications and minimize interruptions.
|
| 183 |
+
6. **Mindfulness Practices**: Incorporate short mindfulness breaks or deep breathing exercises.
|
| 184 |
+
|
| 185 |
+
### Next Steps:
|
| 186 |
+
1. Try implementing these strategies one by one over the next few days.
|
| 187 |
+
2. Pay attention to what works best for you and adjust as needed.
|
| 188 |
+
|
| 189 |
+
Would you like any further assistance with these strategies, or is there anything else on your mind?
|
| 190 |
+
|
| 191 |
+
"""
|
| 192 |
+
|
| 193 |
+
# Format the conversation
|
| 194 |
+
formatted_conversation = format_conversation(raw_conversation)
|
| 195 |
+
|
| 196 |
+
# Print the formatted conversation (optional)
|
| 197 |
+
print("Formatted Conversation:")
|
| 198 |
+
print(formatted_conversation)
|
| 199 |
+
print("\n" + "-"*50 + "\n")
|
| 200 |
+
|
| 201 |
+
# Evaluate the conversation
|
| 202 |
+
print("Sending to API for evaluation...")
|
| 203 |
+
evaluation_result = evaluate_conversation(formatted_conversation)
|
| 204 |
+
|
| 205 |
+
# Print the results
|
| 206 |
+
if evaluation_result:
|
| 207 |
+
print("\nEvaluation Results:")
|
| 208 |
+
print(json.dumps(evaluation_result, indent=2))
|
| 209 |
+
|
| 210 |
+
# Print a summary of the scores
|
| 211 |
+
print("\nCBT Dimension Scores:")
|
| 212 |
+
for dimension, score in evaluation_result["target_scores"].items():
|
| 213 |
+
print(f" {dimension}: {score}")
|
| 214 |
+
|
| 215 |
+
if __name__ == "__main__":
|
| 216 |
+
# If you want to evaluate a different conversation, you can replace raw_conversation
|
| 217 |
+
# with your own conversation text or load it from a file
|
| 218 |
+
pass
|
RegressionEvaluation/step1_data_formation.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
import glob
|
| 4 |
+
import re
|
| 5 |
+
from collections import defaultdict
|
| 6 |
+
import pandas as pd
|
| 7 |
+
from tqdm import tqdm
|
| 8 |
+
|
| 9 |
+
# Define cognitive distortion patterns to identify initial problems
|
| 10 |
+
COGNITIVE_DISTORTIONS = {
|
| 11 |
+
"perfectionism": ["perfect", "flawless", "mistake", "error", "failure"],
|
| 12 |
+
"catastrophizing": ["disaster", "terrible", "horrible", "awful", "end of the world"],
|
| 13 |
+
"black_and_white_thinking": ["always", "never", "completely", "totally", "all or nothing"],
|
| 14 |
+
"mind_reading": ["think about me", "judging me", "think I'm", "perceive me"],
|
| 15 |
+
"overgeneralization": ["everything", "nothing", "everyone", "no one", "always goes wrong"],
|
| 16 |
+
"should_statements": ["should", "must", "have to", "ought to"],
|
| 17 |
+
"emotional_reasoning": ["feel like a failure", "feel worthless", "feel inadequate"],
|
| 18 |
+
"personalization": ["my fault", "blame myself", "responsible for", "caused this"]
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
def identify_initial_problem(initial_input):
|
| 22 |
+
"""Identify the likely cognitive distortion from the initial input."""
|
| 23 |
+
initial_input = initial_input.lower()
|
| 24 |
+
|
| 25 |
+
# Count matches for each distortion type
|
| 26 |
+
matches = defaultdict(int)
|
| 27 |
+
for distortion, patterns in COGNITIVE_DISTORTIONS.items():
|
| 28 |
+
for pattern in patterns:
|
| 29 |
+
if pattern.lower() in initial_input:
|
| 30 |
+
matches[distortion] += 1
|
| 31 |
+
|
| 32 |
+
# Return the distortion with the most matches, or "unspecified" if none
|
| 33 |
+
if matches:
|
| 34 |
+
return max(matches.items(), key=lambda x: x[1])[0]
|
| 35 |
+
return "unspecified"
|
| 36 |
+
|
| 37 |
+
def clean_therapist_response(content):
|
| 38 |
+
"""Clean the therapist response by removing reasoning tags."""
|
| 39 |
+
# Remove reasoning tags
|
| 40 |
+
content = re.sub(r'<reasoning>.*?</reasoning>', '', content, flags=re.DOTALL)
|
| 41 |
+
# Extract answer content
|
| 42 |
+
content = re.sub(r'<answer>(.*?)</answer>', r'\1', content, flags=re.DOTALL)
|
| 43 |
+
return content.strip()
|
| 44 |
+
|
| 45 |
+
def format_conversation(turns):
|
| 46 |
+
"""Format conversation turns into a readable text format."""
|
| 47 |
+
formatted_text = ""
|
| 48 |
+
for turn in turns:
|
| 49 |
+
speaker = "User" if turn["speaker"] == "patient" else "Assistant"
|
| 50 |
+
content = turn["content"]
|
| 51 |
+
|
| 52 |
+
# Clean the content for assistant responses
|
| 53 |
+
if speaker == "Assistant":
|
| 54 |
+
content = clean_therapist_response(content)
|
| 55 |
+
|
| 56 |
+
formatted_text += f"{speaker}: {content}\n\n"
|
| 57 |
+
|
| 58 |
+
return formatted_text.strip()
|
| 59 |
+
|
| 60 |
+
def transform_conversation_to_training_examples(json_file_path):
|
| 61 |
+
"""Transform a conversation JSON file into multiple training examples."""
|
| 62 |
+
try:
|
| 63 |
+
with open(json_file_path, 'r', encoding='utf-8') as f:
|
| 64 |
+
conversation_data = json.load(f)
|
| 65 |
+
|
| 66 |
+
# Extract basic information
|
| 67 |
+
conversation_id = os.path.basename(json_file_path).replace('.json', '')
|
| 68 |
+
|
| 69 |
+
# Check if the required fields exist
|
| 70 |
+
if "metadata" not in conversation_data or "conversation" not in conversation_data or "progress_areas" not in conversation_data:
|
| 71 |
+
print(f"Warning: Missing required fields in {json_file_path}")
|
| 72 |
+
return []
|
| 73 |
+
|
| 74 |
+
initial_input = conversation_data["metadata"]["initial_input"]
|
| 75 |
+
total_turns = len(conversation_data["conversation"])
|
| 76 |
+
|
| 77 |
+
# Extract reward scores
|
| 78 |
+
try:
|
| 79 |
+
target_scores = {
|
| 80 |
+
"thought_identification": float(conversation_data["progress_areas"]["Thought Identification"]),
|
| 81 |
+
"emotion_exploration": float(conversation_data["progress_areas"]["Emotion Exploration"]),
|
| 82 |
+
"distortion_identification": float(conversation_data["progress_areas"]["Distortion Identification"]),
|
| 83 |
+
"evidence_gathering": float(conversation_data["progress_areas"]["Evidence Gathering"]),
|
| 84 |
+
"balanced_thinking": float(conversation_data["progress_areas"]["Balanced Thinking"])
|
| 85 |
+
}
|
| 86 |
+
except (KeyError, ValueError) as e:
|
| 87 |
+
print(f"Warning: Issue with progress scores in {json_file_path}: {e}")
|
| 88 |
+
return []
|
| 89 |
+
|
| 90 |
+
# Identify the initial problem/cognitive distortion
|
| 91 |
+
initial_problem = identify_initial_problem(initial_input)
|
| 92 |
+
|
| 93 |
+
# Create training examples at different conversation lengths
|
| 94 |
+
training_examples = []
|
| 95 |
+
|
| 96 |
+
# Process the conversation at different points (every 2 turns)
|
| 97 |
+
for end_turn in range(2, total_turns + 1, 2):
|
| 98 |
+
# Extract conversation up to this point
|
| 99 |
+
conversation_so_far = conversation_data["conversation"][:end_turn]
|
| 100 |
+
|
| 101 |
+
# Format the conversation text
|
| 102 |
+
formatted_text = format_conversation(conversation_so_far)
|
| 103 |
+
|
| 104 |
+
# Calculate conversation progress
|
| 105 |
+
conversation_progress = round(end_turn / total_turns, 2)
|
| 106 |
+
|
| 107 |
+
# Create the training example
|
| 108 |
+
example = {
|
| 109 |
+
"id": f"{conversation_id}_turn_{end_turn}",
|
| 110 |
+
"conversation_text": formatted_text,
|
| 111 |
+
"target_scores": target_scores,
|
| 112 |
+
"metadata": {
|
| 113 |
+
"total_turns": total_turns,
|
| 114 |
+
"turns_included": end_turn,
|
| 115 |
+
"initial_problem": initial_problem,
|
| 116 |
+
"conversation_progress": conversation_progress
|
| 117 |
+
}
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
training_examples.append(example)
|
| 121 |
+
|
| 122 |
+
return training_examples
|
| 123 |
+
|
| 124 |
+
except Exception as e:
|
| 125 |
+
print(f"Error processing {json_file_path}: {e}")
|
| 126 |
+
return []
|
| 127 |
+
|
| 128 |
+
def process_all_conversations(conversations_dir, output_file):
|
| 129 |
+
"""Process all conversation files and save as a single JSON file."""
|
| 130 |
+
all_examples = []
|
| 131 |
+
|
| 132 |
+
# Get all JSON files in the directory
|
| 133 |
+
json_files = glob.glob(os.path.join(conversations_dir, "*.json"))
|
| 134 |
+
print(f"Found {len(json_files)} conversation files")
|
| 135 |
+
|
| 136 |
+
# Process each file with a progress bar
|
| 137 |
+
for json_file in tqdm(json_files, desc="Processing conversations"):
|
| 138 |
+
examples = transform_conversation_to_training_examples(json_file)
|
| 139 |
+
all_examples.extend(examples)
|
| 140 |
+
|
| 141 |
+
print(f"Generated {len(all_examples)} training examples from {len(json_files)} conversations")
|
| 142 |
+
|
| 143 |
+
# Save all examples to a single file
|
| 144 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 145 |
+
json.dump(all_examples, f, indent=2)
|
| 146 |
+
|
| 147 |
+
print(f"Saved training examples to {output_file}")
|
| 148 |
+
|
| 149 |
+
# Also save a flattened version for easier inspection
|
| 150 |
+
flattened_examples = []
|
| 151 |
+
for example in all_examples:
|
| 152 |
+
flat_example = {
|
| 153 |
+
"id": example["id"],
|
| 154 |
+
"conversation_text": example["conversation_text"],
|
| 155 |
+
"thought_identification": example["target_scores"]["thought_identification"],
|
| 156 |
+
"emotion_exploration": example["target_scores"]["emotion_exploration"],
|
| 157 |
+
"distortion_identification": example["target_scores"]["distortion_identification"],
|
| 158 |
+
"evidence_gathering": example["target_scores"]["evidence_gathering"],
|
| 159 |
+
"balanced_thinking": example["target_scores"]["balanced_thinking"],
|
| 160 |
+
"total_turns": example["metadata"]["total_turns"],
|
| 161 |
+
"turns_included": example["metadata"]["turns_included"],
|
| 162 |
+
"initial_problem": example["metadata"]["initial_problem"],
|
| 163 |
+
"conversation_progress": example["metadata"]["conversation_progress"]
|
| 164 |
+
}
|
| 165 |
+
flattened_examples.append(flat_example)
|
| 166 |
+
|
| 167 |
+
# Save as CSV for easy viewing
|
| 168 |
+
df = pd.DataFrame(flattened_examples)
|
| 169 |
+
csv_output = output_file.replace('.json', '.csv')
|
| 170 |
+
df.to_csv(csv_output, index=False)
|
| 171 |
+
print(f"Saved flattened examples to {csv_output} for easy inspection")
|
| 172 |
+
|
| 173 |
+
return all_examples
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
# Example usage
|
| 177 |
+
if __name__ == "__main__":
|
| 178 |
+
# Set your conversations directory and output file
|
| 179 |
+
conversations_dir = "/Users/saitejagudidevini/Documents/Dev/grpo_trainer/Evaluations/Gemini_QA_Testing/conversations"
|
| 180 |
+
output_file = "cbt_evaluator_training_data.json"
|
| 181 |
+
|
| 182 |
+
# Process all conversations
|
| 183 |
+
examples = process_all_conversations(conversations_dir, output_file)
|
| 184 |
+
|
| 185 |
+
# Print some statistics
|
| 186 |
+
if examples:
|
| 187 |
+
print("\nDataset Statistics:")
|
| 188 |
+
print(f"Total examples: {len(examples)}")
|
| 189 |
+
|
| 190 |
+
# Count examples by initial problem
|
| 191 |
+
problem_counts = {}
|
| 192 |
+
for example in examples:
|
| 193 |
+
problem = example["metadata"]["initial_problem"]
|
| 194 |
+
problem_counts[problem] = problem_counts.get(problem, 0) + 1
|
| 195 |
+
|
| 196 |
+
print("\nExamples by initial problem:")
|
| 197 |
+
for problem, count in sorted(problem_counts.items(), key=lambda x: x[1], reverse=True):
|
| 198 |
+
print(f" {problem}: {count}")
|
| 199 |
+
|
| 200 |
+
# Show distribution of conversation lengths
|
| 201 |
+
turn_counts = {}
|
| 202 |
+
for example in examples:
|
| 203 |
+
turns = example["metadata"]["turns_included"]
|
| 204 |
+
turn_counts[turns] = turn_counts.get(turns, 0) + 1
|
| 205 |
+
|
| 206 |
+
print("\nExamples by conversation length (turns):")
|
| 207 |
+
for turns, count in sorted(turn_counts.items()):
|
| 208 |
+
print(f" {turns} turns: {count}")
|
RegressionEvaluation/step2_datasetformodeltraining.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import torch
|
| 3 |
+
import numpy as np
|
| 4 |
+
from sklearn.model_selection import train_test_split
|
| 5 |
+
from transformers import AutoTokenizer
|
| 6 |
+
from datasets import Dataset, DatasetDict
|
| 7 |
+
import pandas as pd
|
| 8 |
+
from tqdm import tqdm
|
| 9 |
+
import os
|
| 10 |
+
|
| 11 |
+
def prepare_cbt_datasets(input_file, model_name="roberta-base", test_size=0.15, val_size=0.15, max_length=512, seed=42):
|
| 12 |
+
"""
|
| 13 |
+
Prepare datasets for training a CBT evaluator model.
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
input_file: Path to the JSON file containing the training examples
|
| 17 |
+
model_name: Name of the pre-trained model to use
|
| 18 |
+
test_size: Fraction of data to use for testing
|
| 19 |
+
val_size: Fraction of data to use for validation
|
| 20 |
+
max_length: Maximum sequence length for tokenization
|
| 21 |
+
seed: Random seed for reproducibility
|
| 22 |
+
|
| 23 |
+
Returns:
|
| 24 |
+
A DatasetDict containing train, validation, and test datasets
|
| 25 |
+
"""
|
| 26 |
+
print(f"Loading data from {input_file}...")
|
| 27 |
+
|
| 28 |
+
# Load the data
|
| 29 |
+
with open(input_file, 'r', encoding='utf-8') as f:
|
| 30 |
+
examples = json.load(f)
|
| 31 |
+
|
| 32 |
+
print(f"Loaded {len(examples)} examples")
|
| 33 |
+
|
| 34 |
+
# Flatten the examples for easier processing
|
| 35 |
+
flattened_examples = []
|
| 36 |
+
for example in examples:
|
| 37 |
+
flat_example = {
|
| 38 |
+
"id": example["id"],
|
| 39 |
+
"conversation_text": example["conversation_text"],
|
| 40 |
+
"thought_identification": example["target_scores"]["thought_identification"],
|
| 41 |
+
"emotion_exploration": example["target_scores"]["emotion_exploration"],
|
| 42 |
+
"distortion_identification": example["target_scores"]["distortion_identification"],
|
| 43 |
+
"evidence_gathering": example["target_scores"]["evidence_gathering"],
|
| 44 |
+
"balanced_thinking": example["target_scores"]["balanced_thinking"],
|
| 45 |
+
"total_turns": example["metadata"]["total_turns"],
|
| 46 |
+
"turns_included": example["metadata"]["turns_included"],
|
| 47 |
+
"initial_problem": example["metadata"]["initial_problem"],
|
| 48 |
+
"conversation_progress": example["metadata"]["conversation_progress"]
|
| 49 |
+
}
|
| 50 |
+
flattened_examples.append(flat_example)
|
| 51 |
+
|
| 52 |
+
# Convert to DataFrame for easier manipulation
|
| 53 |
+
df = pd.DataFrame(flattened_examples)
|
| 54 |
+
|
| 55 |
+
# Print dataset statistics
|
| 56 |
+
print("\nDataset Statistics:")
|
| 57 |
+
print(f"Number of examples: {len(df)}")
|
| 58 |
+
print(f"Number of unique conversations: {df['id'].str.split('_turn_').str[0].nunique()}")
|
| 59 |
+
print(f"Average conversation length: {df['conversation_text'].str.len().mean():.1f} characters")
|
| 60 |
+
|
| 61 |
+
print("\nTarget Score Distributions:")
|
| 62 |
+
for col in ["thought_identification", "emotion_exploration", "distortion_identification",
|
| 63 |
+
"evidence_gathering", "balanced_thinking"]:
|
| 64 |
+
print(f" {col}: mean={df[col].mean():.2f}, std={df[col].std():.2f}, min={df[col].min():.2f}, max={df[col].max():.2f}")
|
| 65 |
+
|
| 66 |
+
# Split by conversation ID to prevent data leakage
|
| 67 |
+
# Extract the base conversation ID (without the turn number)
|
| 68 |
+
df['conversation_base_id'] = df['id'].str.split('_turn_').str[0]
|
| 69 |
+
|
| 70 |
+
# Get unique conversation IDs
|
| 71 |
+
unique_conv_ids = df['conversation_base_id'].unique()
|
| 72 |
+
|
| 73 |
+
# Split conversation IDs into train, validation, and test sets
|
| 74 |
+
train_ids, temp_ids = train_test_split(
|
| 75 |
+
unique_conv_ids,
|
| 76 |
+
test_size=test_size + val_size,
|
| 77 |
+
random_state=seed
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
val_ids, test_ids = train_test_split(
|
| 81 |
+
temp_ids,
|
| 82 |
+
test_size=test_size / (test_size + val_size),
|
| 83 |
+
random_state=seed
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
# Create train, validation, and test dataframes
|
| 87 |
+
train_df = df[df['conversation_base_id'].isin(train_ids)]
|
| 88 |
+
val_df = df[df['conversation_base_id'].isin(val_ids)]
|
| 89 |
+
test_df = df[df['conversation_base_id'].isin(test_ids)]
|
| 90 |
+
|
| 91 |
+
print(f"\nSplit sizes:")
|
| 92 |
+
print(f" Train: {len(train_df)} examples from {len(train_ids)} conversations")
|
| 93 |
+
print(f" Validation: {len(val_df)} examples from {len(val_ids)} conversations")
|
| 94 |
+
print(f" Test: {len(test_df)} examples from {len(test_ids)} conversations")
|
| 95 |
+
|
| 96 |
+
# Convert to Hugging Face datasets
|
| 97 |
+
train_dataset = Dataset.from_pandas(train_df)
|
| 98 |
+
val_dataset = Dataset.from_pandas(val_df)
|
| 99 |
+
test_dataset = Dataset.from_pandas(test_df)
|
| 100 |
+
|
| 101 |
+
# Create a DatasetDict
|
| 102 |
+
dataset_dict = DatasetDict({
|
| 103 |
+
'train': train_dataset,
|
| 104 |
+
'validation': val_dataset,
|
| 105 |
+
'test': test_dataset
|
| 106 |
+
})
|
| 107 |
+
|
| 108 |
+
# Load tokenizer
|
| 109 |
+
print(f"\nLoading tokenizer for {model_name}...")
|
| 110 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 111 |
+
|
| 112 |
+
# Define tokenization function
|
| 113 |
+
def tokenize_function(examples):
|
| 114 |
+
return tokenizer(
|
| 115 |
+
examples["conversation_text"],
|
| 116 |
+
padding="max_length",
|
| 117 |
+
truncation=True,
|
| 118 |
+
max_length=max_length
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
# Tokenize the datasets
|
| 122 |
+
print("Tokenizing datasets...")
|
| 123 |
+
tokenized_datasets = dataset_dict.map(
|
| 124 |
+
tokenize_function,
|
| 125 |
+
batched=True,
|
| 126 |
+
desc="Tokenizing",
|
| 127 |
+
remove_columns=['conversation_base_id'] # Remove the helper column we added
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
# Save the processed datasets
|
| 131 |
+
output_dir = "cbt_evaluator_data"
|
| 132 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 133 |
+
|
| 134 |
+
print(f"\nSaving processed datasets to {output_dir}...")
|
| 135 |
+
tokenized_datasets.save_to_disk(output_dir)
|
| 136 |
+
|
| 137 |
+
# Also save the tokenizer for consistency
|
| 138 |
+
tokenizer.save_pretrained(os.path.join(output_dir, "tokenizer"))
|
| 139 |
+
|
| 140 |
+
print("Dataset preparation complete!")
|
| 141 |
+
return tokenized_datasets
|
| 142 |
+
|
| 143 |
+
# Example usage
|
| 144 |
+
if __name__ == "__main__":
|
| 145 |
+
# Set your input file (output from Step 1)
|
| 146 |
+
input_file = "cbt_evaluator_training_data.json"
|
| 147 |
+
|
| 148 |
+
datasets = prepare_cbt_datasets(
|
| 149 |
+
input_file=input_file,
|
| 150 |
+
model_name="roberta-base", # You can change this to another model if preferred
|
| 151 |
+
test_size=0.15,
|
| 152 |
+
val_size=0.15,
|
| 153 |
+
max_length=512,
|
| 154 |
+
seed=42
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
# Print dataset sizes
|
| 158 |
+
print("\nFinal dataset sizes:")
|
| 159 |
+
for split, dataset in datasets.items():
|
| 160 |
+
print(f" {split}: {len(dataset)} examples")
|
| 161 |
+
|
| 162 |
+
# Print example from training set
|
| 163 |
+
print("\nExample from training set:")
|
| 164 |
+
example = datasets["train"][0]
|
| 165 |
+
print(f"ID: {example['id']}")
|
| 166 |
+
print(f"Input IDs shape: {len(example['input_ids'])}")
|
| 167 |
+
print(f"Target scores:")
|
| 168 |
+
print(f" Thought identification: {example['thought_identification']}")
|
| 169 |
+
print(f" Emotion exploration: {example['emotion_exploration']}")
|
| 170 |
+
print(f" Distortion identification: {example['distortion_identification']}")
|
| 171 |
+
print(f" Evidence gathering: {example['evidence_gathering']}")
|
| 172 |
+
print(f" Balanced thinking: {example['balanced_thinking']}")
|
| 173 |
+
|
RegressionEvaluation/step3_trainingthe_cbt_evaluator_model.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import torch
|
| 3 |
+
import numpy as np
|
| 4 |
+
from transformers import (
|
| 5 |
+
AutoModelForSequenceClassification,
|
| 6 |
+
AutoTokenizer,
|
| 7 |
+
TrainingArguments,
|
| 8 |
+
Trainer,
|
| 9 |
+
EarlyStoppingCallback
|
| 10 |
+
)
|
| 11 |
+
from datasets import load_from_disk
|
| 12 |
+
import evaluate
|
| 13 |
+
from sklearn.metrics import mean_squared_error, r2_score
|
| 14 |
+
import matplotlib.pyplot as plt
|
| 15 |
+
import pandas as pd
|
| 16 |
+
from tqdm import tqdm
|
RegressionEvaluation/step4training.py
ADDED
|
@@ -0,0 +1,379 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
import glob
|
| 4 |
+
import re
|
| 5 |
+
import numpy as np
|
| 6 |
+
import pandas as pd
|
| 7 |
+
from tqdm import tqdm
|
| 8 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 9 |
+
from sklearn.ensemble import GradientBoostingRegressor
|
| 10 |
+
from sklearn.multioutput import MultiOutputRegressor
|
| 11 |
+
from sklearn.model_selection import train_test_split
|
| 12 |
+
from sklearn.metrics import mean_squared_error, r2_score
|
| 13 |
+
import pickle
|
| 14 |
+
import matplotlib.pyplot as plt
|
| 15 |
+
from collections import defaultdict
|
| 16 |
+
import joblib
|
| 17 |
+
from flask import Flask, request, jsonify
|
| 18 |
+
import uuid
|
| 19 |
+
from datetime import datetime
|
| 20 |
+
|
| 21 |
+
# Define cognitive distortion patterns
|
| 22 |
+
COGNITIVE_DISTORTIONS = {
|
| 23 |
+
"perfectionism": ["perfect", "flawless", "mistake", "error", "failure"],
|
| 24 |
+
"catastrophizing": ["disaster", "terrible", "horrible", "awful", "end of the world"],
|
| 25 |
+
"black_and_white_thinking": ["always", "never", "completely", "totally", "all or nothing"],
|
| 26 |
+
"mind_reading": ["think about me", "judging me", "think I'm", "perceive me"],
|
| 27 |
+
"overgeneralization": ["everything", "nothing", "everyone", "no one", "always goes wrong"],
|
| 28 |
+
"should_statements": ["should", "must", "have to", "ought to"],
|
| 29 |
+
"emotional_reasoning": ["feel like a failure", "feel worthless", "feel inadequate"],
|
| 30 |
+
"personalization": ["my fault", "blame myself", "responsible for", "caused this"]
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
class CBTEvaluatorSimple:
|
| 34 |
+
def __init__(self, output_dir="cbt_evaluator_simple", max_features=5000, random_state=42):
|
| 35 |
+
self.output_dir = output_dir
|
| 36 |
+
self.max_features = max_features
|
| 37 |
+
self.random_state = random_state
|
| 38 |
+
|
| 39 |
+
# Create output directory
|
| 40 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 41 |
+
|
| 42 |
+
# Initialize feature extractor
|
| 43 |
+
self.vectorizer = TfidfVectorizer(
|
| 44 |
+
max_features=max_features,
|
| 45 |
+
ngram_range=(1, 2),
|
| 46 |
+
stop_words='english'
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
# Initialize model
|
| 50 |
+
self.model = MultiOutputRegressor(
|
| 51 |
+
GradientBoostingRegressor(
|
| 52 |
+
n_estimators=100,
|
| 53 |
+
learning_rate=0.1,
|
| 54 |
+
max_depth=5,
|
| 55 |
+
random_state=random_state
|
| 56 |
+
)
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
def identify_initial_problem(self, initial_input):
|
| 60 |
+
"""Identify the likely cognitive distortion from the initial input."""
|
| 61 |
+
initial_input = initial_input.lower()
|
| 62 |
+
|
| 63 |
+
# Count matches for each distortion type
|
| 64 |
+
matches = defaultdict(int)
|
| 65 |
+
for distortion, patterns in COGNITIVE_DISTORTIONS.items():
|
| 66 |
+
for pattern in patterns:
|
| 67 |
+
if pattern.lower() in initial_input:
|
| 68 |
+
matches[distortion] += 1
|
| 69 |
+
|
| 70 |
+
# Return the distortion with the most matches, or "unspecified" if none
|
| 71 |
+
if matches:
|
| 72 |
+
return max(matches.items(), key=lambda x: x[1])[0]
|
| 73 |
+
return "unspecified"
|
| 74 |
+
|
| 75 |
+
def clean_therapist_response(self, content):
|
| 76 |
+
"""Clean the therapist response by removing reasoning tags."""
|
| 77 |
+
# Remove reasoning tags
|
| 78 |
+
content = re.sub(r'<reasoning>.*?</reasoning>', '', content, flags=re.DOTALL)
|
| 79 |
+
# Extract answer content
|
| 80 |
+
content = re.sub(r'<answer>(.*?)</answer>', r'\1', content, flags=re.DOTALL)
|
| 81 |
+
return content.strip()
|
| 82 |
+
|
| 83 |
+
def format_conversation(self, turns):
|
| 84 |
+
"""Format conversation turns into a readable text format."""
|
| 85 |
+
formatted_text = ""
|
| 86 |
+
for turn in turns:
|
| 87 |
+
speaker = "User" if turn["speaker"] == "patient" else "Assistant"
|
| 88 |
+
content = turn["content"]
|
| 89 |
+
|
| 90 |
+
# Clean the content for assistant responses
|
| 91 |
+
if speaker == "Assistant":
|
| 92 |
+
content = self.clean_therapist_response(content)
|
| 93 |
+
|
| 94 |
+
formatted_text += f"{speaker}: {content}\n\n"
|
| 95 |
+
|
| 96 |
+
return formatted_text.strip()
|
| 97 |
+
|
| 98 |
+
def process_conversations(self, conversations_dir):
|
| 99 |
+
"""Process all conversation files into a dataset."""
|
| 100 |
+
all_examples = []
|
| 101 |
+
|
| 102 |
+
# Get all JSON files in the directory
|
| 103 |
+
json_files = glob.glob(os.path.join(conversations_dir, "*.json"))
|
| 104 |
+
print(f"Found {len(json_files)} conversation files")
|
| 105 |
+
|
| 106 |
+
# Process each file with a progress bar
|
| 107 |
+
for json_file in tqdm(json_files, desc="Processing conversations"):
|
| 108 |
+
try:
|
| 109 |
+
with open(json_file, 'r', encoding='utf-8') as f:
|
| 110 |
+
conversation_data = json.load(f)
|
| 111 |
+
|
| 112 |
+
# Extract basic information
|
| 113 |
+
conversation_id = os.path.basename(json_file).replace('.json', '')
|
| 114 |
+
|
| 115 |
+
# Check if the required fields exist
|
| 116 |
+
if "metadata" not in conversation_data or "conversation" not in conversation_data or "progress_areas" not in conversation_data:
|
| 117 |
+
print(f"Warning: Missing required fields in {json_file}")
|
| 118 |
+
continue
|
| 119 |
+
|
| 120 |
+
initial_input = conversation_data["metadata"]["initial_input"]
|
| 121 |
+
total_turns = len(conversation_data["conversation"])
|
| 122 |
+
|
| 123 |
+
# Extract reward scores
|
| 124 |
+
try:
|
| 125 |
+
target_scores = {
|
| 126 |
+
"thought_identification": float(conversation_data["progress_areas"]["Thought Identification"]),
|
| 127 |
+
"emotion_exploration": float(conversation_data["progress_areas"]["Emotion Exploration"]),
|
| 128 |
+
"distortion_identification": float(conversation_data["progress_areas"]["Distortion Identification"]),
|
| 129 |
+
"evidence_gathering": float(conversation_data["progress_areas"]["Evidence Gathering"]),
|
| 130 |
+
"balanced_thinking": float(conversation_data["progress_areas"]["Balanced Thinking"])
|
| 131 |
+
}
|
| 132 |
+
except (KeyError, ValueError) as e:
|
| 133 |
+
print(f"Warning: Issue with progress scores in {json_file}: {e}")
|
| 134 |
+
continue
|
| 135 |
+
|
| 136 |
+
# Identify the initial problem/cognitive distortion
|
| 137 |
+
initial_problem = self.identify_initial_problem(initial_input)
|
| 138 |
+
|
| 139 |
+
# Create training examples at different conversation lengths
|
| 140 |
+
for end_turn in range(2, total_turns + 1, 2):
|
| 141 |
+
# Extract conversation up to this point
|
| 142 |
+
conversation_so_far = conversation_data["conversation"][:end_turn]
|
| 143 |
+
|
| 144 |
+
# Format the conversation text
|
| 145 |
+
formatted_text = self.format_conversation(conversation_so_far)
|
| 146 |
+
|
| 147 |
+
# Calculate conversation progress
|
| 148 |
+
conversation_progress = round(end_turn / total_turns, 2)
|
| 149 |
+
|
| 150 |
+
# Create the training example
|
| 151 |
+
example = {
|
| 152 |
+
"id": f"{conversation_id}_turn_{end_turn}",
|
| 153 |
+
"conversation_text": formatted_text,
|
| 154 |
+
"thought_identification": target_scores["thought_identification"],
|
| 155 |
+
"emotion_exploration": target_scores["emotion_exploration"],
|
| 156 |
+
"distortion_identification": target_scores["distortion_identification"],
|
| 157 |
+
"evidence_gathering": target_scores["evidence_gathering"],
|
| 158 |
+
"balanced_thinking": target_scores["balanced_thinking"],
|
| 159 |
+
"total_turns": total_turns,
|
| 160 |
+
"turns_included": end_turn,
|
| 161 |
+
"initial_problem": initial_problem,
|
| 162 |
+
"conversation_progress": conversation_progress
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
all_examples.append(example)
|
| 166 |
+
|
| 167 |
+
except Exception as e:
|
| 168 |
+
print(f"Error processing {json_file}: {e}")
|
| 169 |
+
|
| 170 |
+
print(f"Generated {len(all_examples)} training examples from {len(json_files)} conversations")
|
| 171 |
+
return pd.DataFrame(all_examples)
|
| 172 |
+
|
| 173 |
+
def train(self, conversations_dir, test_size=0.2):
|
| 174 |
+
"""Train the model on conversation data."""
|
| 175 |
+
# Process conversations
|
| 176 |
+
df = self.process_conversations(conversations_dir)
|
| 177 |
+
|
| 178 |
+
# Save the processed data
|
| 179 |
+
df.to_csv(os.path.join(self.output_dir, "processed_data.csv"), index=False)
|
| 180 |
+
|
| 181 |
+
# Split into features and targets
|
| 182 |
+
X = df["conversation_text"]
|
| 183 |
+
y = df[["thought_identification", "emotion_exploration", "distortion_identification",
|
| 184 |
+
"evidence_gathering", "balanced_thinking"]].values
|
| 185 |
+
|
| 186 |
+
# Split into train and test sets
|
| 187 |
+
X_train, X_test, y_train, y_test = train_test_split(
|
| 188 |
+
X, y, test_size=test_size, random_state=self.random_state
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
+
print(f"Training set size: {len(X_train)}")
|
| 192 |
+
print(f"Test set size: {len(X_test)}")
|
| 193 |
+
|
| 194 |
+
# Fit the vectorizer
|
| 195 |
+
print("Fitting vectorizer...")
|
| 196 |
+
X_train_vec = self.vectorizer.fit_transform(X_train)
|
| 197 |
+
|
| 198 |
+
# Train the model
|
| 199 |
+
print("Training model...")
|
| 200 |
+
self.model.fit(X_train_vec, y_train)
|
| 201 |
+
|
| 202 |
+
# Evaluate on test set
|
| 203 |
+
print("Evaluating on test set...")
|
| 204 |
+
X_test_vec = self.vectorizer.transform(X_test)
|
| 205 |
+
y_pred = self.model.predict(X_test_vec)
|
| 206 |
+
|
| 207 |
+
# Calculate metrics
|
| 208 |
+
mse = mean_squared_error(y_test, y_pred)
|
| 209 |
+
r2 = r2_score(y_test, y_pred)
|
| 210 |
+
|
| 211 |
+
print(f"Test MSE: {mse:.4f}")
|
| 212 |
+
print(f"Test R²: {r2:.4f}")
|
| 213 |
+
|
| 214 |
+
# Calculate metrics for each dimension
|
| 215 |
+
dimension_names = ["thought_identification", "emotion_exploration", "distortion_identification",
|
| 216 |
+
"evidence_gathering", "balanced_thinking"]
|
| 217 |
+
|
| 218 |
+
dimension_metrics = {}
|
| 219 |
+
for i, name in enumerate(dimension_names):
|
| 220 |
+
dim_mse = mean_squared_error(y_test[:, i], y_pred[:, i])
|
| 221 |
+
dim_r2 = r2_score(y_test[:, i], y_pred[:, i])
|
| 222 |
+
dimension_metrics[name] = {"mse": dim_mse, "r2": dim_r2}
|
| 223 |
+
print(f" {name}: MSE = {dim_mse:.4f}, R² = {dim_r2:.4f}")
|
| 224 |
+
|
| 225 |
+
# Save the model and vectorizer
|
| 226 |
+
print(f"Saving model to {self.output_dir}...")
|
| 227 |
+
joblib.dump(self.vectorizer, os.path.join(self.output_dir, "vectorizer.joblib"))
|
| 228 |
+
joblib.dump(self.model, os.path.join(self.output_dir, "model.joblib"))
|
| 229 |
+
|
| 230 |
+
# Save the metrics
|
| 231 |
+
with open(os.path.join(self.output_dir, "metrics.json"), "w") as f:
|
| 232 |
+
json.dump({
|
| 233 |
+
"overall": {"mse": mse, "r2": r2},
|
| 234 |
+
"dimensions": dimension_metrics
|
| 235 |
+
}, f, indent=2)
|
| 236 |
+
|
| 237 |
+
# Visualize predictions vs actual values
|
| 238 |
+
self.visualize_predictions(y_test, y_pred, dimension_names)
|
| 239 |
+
|
| 240 |
+
return mse, r2, dimension_metrics
|
| 241 |
+
|
| 242 |
+
def visualize_predictions(self, y_true, y_pred, dimension_names):
|
| 243 |
+
"""Visualize predictions vs actual values."""
|
| 244 |
+
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
|
| 245 |
+
axes = axes.flatten()
|
| 246 |
+
|
| 247 |
+
for i, name in enumerate(dimension_names):
|
| 248 |
+
ax = axes[i]
|
| 249 |
+
ax.scatter(y_true[:, i], y_pred[:, i], alpha=0.5)
|
| 250 |
+
ax.plot([0, 1], [0, 1], 'r--') # Diagonal line
|
| 251 |
+
ax.set_xlabel("Actual")
|
| 252 |
+
ax.set_ylabel("Predicted")
|
| 253 |
+
ax.set_title(name)
|
| 254 |
+
ax.set_xlim(0, 1)
|
| 255 |
+
ax.set_ylim(0, 1)
|
| 256 |
+
|
| 257 |
+
# Add R² value
|
| 258 |
+
r2 = r2_score(y_true[:, i], y_pred[:, i])
|
| 259 |
+
ax.text(0.05, 0.95, f"R² = {r2:.3f}", transform=ax.transAxes)
|
| 260 |
+
|
| 261 |
+
# Remove the unused subplot
|
| 262 |
+
fig.delaxes(axes[5])
|
| 263 |
+
|
| 264 |
+
# Add overall title
|
| 265 |
+
fig.suptitle("Predicted vs Actual Values", fontsize=16)
|
| 266 |
+
fig.tight_layout()
|
| 267 |
+
|
| 268 |
+
# Save the figure
|
| 269 |
+
fig.savefig(os.path.join(self.output_dir, "predictions_vs_actual.png"))
|
| 270 |
+
print(f"Visualization saved to {os.path.join(self.output_dir, 'predictions_vs_actual.png')}")
|
| 271 |
+
|
| 272 |
+
def predict(self, conversation_text):
|
| 273 |
+
"""Predict CBT scores for a conversation."""
|
| 274 |
+
# Transform the text
|
| 275 |
+
X = self.vectorizer.transform([conversation_text])
|
| 276 |
+
|
| 277 |
+
# Make predictions
|
| 278 |
+
predictions = self.model.predict(X)[0]
|
| 279 |
+
|
| 280 |
+
# Ensure predictions are in the range [0, 1]
|
| 281 |
+
predictions = np.clip(predictions, 0, 1)
|
| 282 |
+
|
| 283 |
+
# Format the predictions
|
| 284 |
+
dimension_names = ["thought_identification", "emotion_exploration", "distortion_identification",
|
| 285 |
+
"evidence_gathering", "balanced_thinking"]
|
| 286 |
+
|
| 287 |
+
return {name: float(pred) for name, pred in zip(dimension_names, predictions)}
|
| 288 |
+
|
| 289 |
+
def count_turns(self, conversation_text):
|
| 290 |
+
"""Count the number of turns in the conversation."""
|
| 291 |
+
# Count occurrences of "User:" and "Assistant:"
|
| 292 |
+
user_turns = len(re.findall(r"User:", conversation_text))
|
| 293 |
+
assistant_turns = len(re.findall(r"Assistant:", conversation_text))
|
| 294 |
+
return user_turns + assistant_turns
|
| 295 |
+
|
| 296 |
+
def identify_problem_from_text(self, conversation_text):
|
| 297 |
+
"""Identify the likely cognitive distortion from the conversation text."""
|
| 298 |
+
# Extract the first user message
|
| 299 |
+
match = re.search(r"User: (.*?)(?:\n\n|$)", conversation_text)
|
| 300 |
+
if not match:
|
| 301 |
+
return "unspecified"
|
| 302 |
+
|
| 303 |
+
initial_input = match.group(1)
|
| 304 |
+
return self.identify_initial_problem(initial_input)
|
| 305 |
+
|
| 306 |
+
# Create a Flask API for the model
|
| 307 |
+
def create_flask_app(model_dir="cbt_evaluator_simple"):
|
| 308 |
+
app = Flask(__name__)
|
| 309 |
+
|
| 310 |
+
# Load the model and vectorizer
|
| 311 |
+
vectorizer = joblib.load(os.path.join(model_dir, "vectorizer.joblib"))
|
| 312 |
+
model = joblib.load(os.path.join(model_dir, "model.joblib"))
|
| 313 |
+
|
| 314 |
+
# Create an evaluator instance
|
| 315 |
+
evaluator = CBTEvaluatorSimple()
|
| 316 |
+
evaluator.vectorizer = vectorizer
|
| 317 |
+
evaluator.model = model
|
| 318 |
+
|
| 319 |
+
@app.route('/evaluate', methods=['POST'])
|
| 320 |
+
def evaluate_conversation():
|
| 321 |
+
"""Evaluate a conversation."""
|
| 322 |
+
# Get conversation from request
|
| 323 |
+
data = request.json
|
| 324 |
+
conversation_text = data.get('conversation', '')
|
| 325 |
+
|
| 326 |
+
if not conversation_text:
|
| 327 |
+
return jsonify({"error": "No conversation provided"}), 400
|
| 328 |
+
|
| 329 |
+
# Get predictions
|
| 330 |
+
scores = evaluator.predict(conversation_text)
|
| 331 |
+
|
| 332 |
+
# Count turns and identify initial problem
|
| 333 |
+
total_turns = evaluator.count_turns(conversation_text)
|
| 334 |
+
initial_problem = evaluator.identify_problem_from_text(conversation_text)
|
| 335 |
+
|
| 336 |
+
# Create the final response
|
| 337 |
+
result = {
|
| 338 |
+
"id": f"eval_{uuid.uuid4().hex[:8]}_{datetime.now().strftime('%Y%m%d%H%M%S')}",
|
| 339 |
+
"conversation_text": conversation_text,
|
| 340 |
+
"target_scores": {
|
| 341 |
+
"thought_identification": round(scores["thought_identification"], 2),
|
| 342 |
+
"emotion_exploration": round(scores["emotion_exploration"], 2),
|
| 343 |
+
"distortion_identification": round(scores["distortion_identification"], 2),
|
| 344 |
+
"evidence_gathering": round(scores["evidence_gathering"], 2),
|
| 345 |
+
"balanced_thinking": round(scores["balanced_thinking"], 2)
|
| 346 |
+
},
|
| 347 |
+
"metadata": {
|
| 348 |
+
"total_turns": total_turns,
|
| 349 |
+
"turns_included": total_turns,
|
| 350 |
+
"initial_problem": initial_problem,
|
| 351 |
+
"conversation_progress": 1.0 # Assuming full conversation
|
| 352 |
+
}
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
+
return jsonify(result)
|
| 356 |
+
|
| 357 |
+
@app.route('/health', methods=['GET'])
|
| 358 |
+
def health_check():
|
| 359 |
+
"""Health check endpoint."""
|
| 360 |
+
return jsonify({"status": "healthy"})
|
| 361 |
+
|
| 362 |
+
return app
|
| 363 |
+
|
| 364 |
+
# Example usage
|
| 365 |
+
if __name__ == "__main__":
|
| 366 |
+
# Check if we're training or serving
|
| 367 |
+
import sys
|
| 368 |
+
|
| 369 |
+
if len(sys.argv) > 1 and sys.argv[1] == "serve":
|
| 370 |
+
# Serve the model
|
| 371 |
+
app = create_flask_app()
|
| 372 |
+
app.run(debug=True, host='0.0.0.0', port=5009)
|
| 373 |
+
else:
|
| 374 |
+
# Train the model
|
| 375 |
+
evaluator = CBTEvaluatorSimple()
|
| 376 |
+
evaluator.train("/Users/saitejagudidevini/Documents/Dev/grpo_trainer/Evaluations/Gemini_QA_Testing/conversations")
|
| 377 |
+
|
| 378 |
+
print("\nTo serve the model, run:")
|
| 379 |
+
print("python cbt_evaluator_simple.py serve")
|