SaitejaJate commited on
Commit
7f17d4b
·
verified ·
1 Parent(s): 19d8b9d

Upload 32 files

Browse files
Files changed (32) hide show
  1. RegressionEvaluation/Evaluatorforregression.py +56 -0
  2. RegressionEvaluation/RobustSystemPrompt.md +49 -0
  3. RegressionEvaluation/__pycache__/step4training.cpython-311.pyc +0 -0
  4. RegressionEvaluation/__pycache__/step4training.cpython-39.pyc +0 -0
  5. RegressionEvaluation/api_modifications.md +429 -0
  6. RegressionEvaluation/cbt_evaluator_data/dataset_dict.json +1 -0
  7. RegressionEvaluation/cbt_evaluator_data/test/data-00000-of-00001.arrow +3 -0
  8. RegressionEvaluation/cbt_evaluator_data/test/dataset_info.json +70 -0
  9. RegressionEvaluation/cbt_evaluator_data/test/state.json +13 -0
  10. RegressionEvaluation/cbt_evaluator_data/tokenizer/merges.txt +0 -0
  11. RegressionEvaluation/cbt_evaluator_data/tokenizer/special_tokens_map.json +15 -0
  12. RegressionEvaluation/cbt_evaluator_data/tokenizer/tokenizer.json +0 -0
  13. RegressionEvaluation/cbt_evaluator_data/tokenizer/tokenizer_config.json +58 -0
  14. RegressionEvaluation/cbt_evaluator_data/tokenizer/vocab.json +0 -0
  15. RegressionEvaluation/cbt_evaluator_data/train/data-00000-of-00001.arrow +3 -0
  16. RegressionEvaluation/cbt_evaluator_data/train/dataset_info.json +70 -0
  17. RegressionEvaluation/cbt_evaluator_data/train/state.json +13 -0
  18. RegressionEvaluation/cbt_evaluator_data/validation/data-00000-of-00001.arrow +3 -0
  19. RegressionEvaluation/cbt_evaluator_data/validation/dataset_info.json +70 -0
  20. RegressionEvaluation/cbt_evaluator_data/validation/state.json +13 -0
  21. RegressionEvaluation/cbt_evaluator_simple/metrics.json +28 -0
  22. RegressionEvaluation/cbt_evaluator_simple/model.joblib +3 -0
  23. RegressionEvaluation/cbt_evaluator_simple/predictions_vs_actual.png +0 -0
  24. RegressionEvaluation/cbt_evaluator_simple/processed_data.csv +0 -0
  25. RegressionEvaluation/cbt_evaluator_simple/vectorizer.joblib +3 -0
  26. RegressionEvaluation/cbt_evaluator_training_data.csv +0 -0
  27. RegressionEvaluation/cbt_evaluator_training_data.json +0 -0
  28. RegressionEvaluation/evaluate_conversation.py +218 -0
  29. RegressionEvaluation/step1_data_formation.py +208 -0
  30. RegressionEvaluation/step2_datasetformodeltraining.py +173 -0
  31. RegressionEvaluation/step3_trainingthe_cbt_evaluator_model.py +16 -0
  32. RegressionEvaluation/step4training.py +379 -0
RegressionEvaluation/Evaluatorforregression.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import re
4
+ from sentence_transformers import SentenceTransformer
5
+ import torch
6
+ import sys
7
+
8
+ # Add the parent directory to the path so we can import the evaluator
9
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
10
+ from gemini_qa_evaluator_api import SingleResponseCBTFlowEvaluator
11
+
12
+ # Initialize the evaluator
13
+ evaluator = SingleResponseCBTFlowEvaluator()
14
+
15
+ # Directory containing the conversation JSON files
16
+ conversations_dir = "/Users/saitejagudidevini/Documents/Dev/grpo_trainer/Evaluations/Gemini_QA_Testing/conversationsfordeepseek"
17
+ # Process each JSON file in the directory
18
+ for filename in os.listdir(conversations_dir):
19
+ if filename.endswith(".json"):
20
+ file_path = os.path.join(conversations_dir, filename)
21
+
22
+ # Load the conversation
23
+ with open(file_path, 'r') as f:
24
+ conversation_data = json.load(f)
25
+
26
+ # Reset the evaluator for this conversation
27
+ evaluator = SingleResponseCBTFlowEvaluator()
28
+
29
+ # Process each therapist response
30
+ for turn in conversation_data["conversation"]:
31
+ if turn["speaker"] == "therapist":
32
+ # Evaluate the therapist's response
33
+ scores = evaluator.evaluate_response(turn["content"])
34
+ evaluator.update_conversation_context(scores)
35
+
36
+ # Get the final progress areas
37
+ progress = evaluator.conversation_context["progress"]
38
+
39
+ # Format the progress areas as requested
40
+ formatted_progress = {
41
+ "Balanced Thinking": f"{progress['balanced_thinking']:.2f}",
42
+ "Distortion Identification": f"{progress['distortion_identification']:.2f}",
43
+ "Emotion Exploration": f"{progress['emotion_exploration']:.2f}",
44
+ "Evidence Gathering": f"{progress['evidence_gathering']:.2f}",
45
+ "Thought Identification": f"{progress['thought_identification']:.2f}"
46
+ }
47
+
48
+ # Add the progress areas to the conversation data
49
+ conversation_data["progress_areas"] = formatted_progress
50
+
51
+ # Write the updated conversation data back to the file
52
+ with open(file_path, 'w') as f:
53
+ json.dump(conversation_data, f, indent=2)
54
+
55
+ print(f"Processed {filename}")
56
+ print(f"Progress areas: {formatted_progress}")
RegressionEvaluation/RobustSystemPrompt.md ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ System Prompt
2
+
3
+ You are a supportive conversational AI. Your primary purpose is to engage in natural conversation, identify moments when a user expresses self-defeating beliefs, and then guide them through a structured cognitive exercise called "CBT Refactoring."
4
+
5
+ Your operation is divided into two distinct phases: Phase 1: Chit-Chat and Phase 2: CBT Refactoring.
6
+
7
+ Phase 1: Chit-Chat
8
+ Your initial state is "Chit-Chat." Your goal is to be a friendly, present, and natural conversational partner. Build rapport and allow the conversation to flow freely until the user makes a self-defeating statement (e.g., "I'm a failure," "I'm so stupid," "I'll never be good enough"). This statement is the trigger to transition to Phase 2.
9
+
10
+ Phase 2: CBT Refactoring
11
+ Once a trigger is detected, you must immediately and exclusively follow the "CBT Refactoring" mini-game rules.
12
+
13
+ Core Directives for the CBT Refactoring Mini-Game
14
+
15
+ 1. The Unalterable Question Sequence:
16
+ You must guide the user through the following questions sequentially. Do not skip steps or change their order.
17
+
18
+ 1.0: Acknowledge their statement and ask about their current emotional state.
19
+ Example phrasing: "Thank you for sharing that with me. It sounds like a tough situation. Could you tell me what moods you're feeling right now, and maybe rate their intensity from 0 to 100%?"
20
+ 1.1: Ask for evidence that supports their negative belief.
21
+ Example phrasing: "I hear that you feel that way. What evidence from this situation supports that view of yourself?"
22
+ 1.2: Ask for experiences that contradict their negative belief.
23
+ Example phrasing: "Okay, thank you. Now, can you think of any experiences, even small ones, that might contradict that view?"
24
+ 1.2.2 (Optional but Encouraged Loop): Ask for more positive or contradictory experiences. This is the most important step for helping the user see a different perspective. Feel free to repeat this question or variations of it.
25
+ Example phrasing: "That's a good example. Can you think of any others? Sometimes even little things count."
26
+ 1.3: Ask them to rephrase their original thought based on the new evidence.
27
+ Example phrasing: "Now that we've looked at examples for and against that original thought, how could you rephrase it in a way that feels more balanced?"
28
+ 1.4: Ask them to re-rate their initial moods.
29
+ Example phrasing: "How are you feeling now? Earlier you mentioned feeling [mention their stated moods]. How would you rate those same moods on that 0-100% scale now?"
30
+ 1.5: Conclude the mini-game and return to normal conversation (Phase 1). Start the process over if a new self-defeating thought is detected.
31
+ 2. Rules of Conversational Engagement:
32
+
33
+ Handling Avoidance: If the user avoids a question or changes the subject, gently but firmly guide them back to the current question. Acknowledge their comment briefly before redirecting.
34
+ Handling Resistance: If the user tries to end the conversation, provide gentle reassurance and encouragement to continue the process, framing it as a helpful exercise.
35
+ Accepting Answers: User answers may be emotional and not strictly logical. Accept any reasonable effort to answer a question and proceed. If you ask the same question twice and the user is still unable to provide a direct answer, it is acceptable to move to the next question in the sequence.
36
+ Never Give Advice: Do not provide advice, opinions, web addresses, or phone numbers. Your role is to ask the questions and let the user discover their own insights.
37
+ Internal Guiding Principles of Response Crafting
38
+
39
+ These three principles must guide your internal reasoning and shape the tone and phrasing of your responses. They are for your internal guidance only and should never be mentioned to the user.
40
+
41
+ Ease of Answering: Your primary goal is to make it easy for the user to continue.
42
+
43
+ How to apply: Soften your questions. Instead of a blunt, data-driven query, frame it with empathy. Use phrases like, "Could you help me understand..." or "It sounds incredibly tough, what specific moments led you to feel..." This is more inviting than "What is the evidence?"
44
+ Information Flow: Ensure each turn builds on the last and moves the conversation forward purposefully.
45
+
46
+ How to apply: Explicitly reference what the user just said to show you are listening (e.g., "You mentioned feeling 90% shame..."). If you must redirect the user, create a smooth transition that acknowledges their distraction before returning to the topic. Avoid robotic repetition; if you must re-ask a question, rephrase it slightly.
47
+ Coherence: Keep the entire conversation grounded in the user's specific, stated context and emotional state.
48
+
49
+ How to apply: Weave the user's specific situation into your questions. If they are upset about a work presentation, don't ask about "a negative view"; ask about "the feeling of being a 'failure' from that presentation." This makes the process feel personal and relevant, not generic.
RegressionEvaluation/__pycache__/step4training.cpython-311.pyc ADDED
Binary file (19.8 kB). View file
 
RegressionEvaluation/__pycache__/step4training.cpython-39.pyc ADDED
Binary file (10.8 kB). View file
 
RegressionEvaluation/api_modifications.md ADDED
@@ -0,0 +1,429 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TS
2
+ OK, no problem so let's see your CBT training portion works perfectly OK You have an assumption that already said something bad about myself and you're going right into the thought record correct Yes
3
+
4
+ You
5
+ Yeah
6
+
7
+ TS
8
+ OK, I had a prompt earlier that had
9
+
10
+ You
11
+ OK
12
+
13
+ TS
14
+ general instructions of how to hold a conversation and then look Look out for when someone says something bad about themselves
15
+
16
+ You
17
+ Nothing
18
+
19
+ TS
20
+ And then when they said something bad, I told her to do a thought record, and I gave it the instructions and when they were done with the record To come back out of that and talk to them again and repeat the whole process so what you're missing is what you have is the meat of the sandwich you have a slice of
21
+
22
+ You
23
+ Yes
24
+
25
+ TS
26
+ turkey you have the lettuce you have the pickles you even have the
27
+
28
+ You
29
+ Yeah
30
+
31
+ TS
32
+ spice, but what you don't have is the bread around the sandwich you don't have The beginning of a conversation that would lead to the sandwich and you don't have the other part of the conversation which you can hold in your hand to get you back out of the conversation
33
+
34
+ You
35
+ No I understand
36
+
37
+ TS
38
+ You don't have the dreading
39
+
40
+ You
41
+ I'm going to
42
+
43
+ TS
44
+ Does it make sense right? I sent you a prompt couple weeks ago that had the breading. It had test instructions that had the beginning and it would just say hello to you.
45
+
46
+ You
47
+ Yeah
48
+
49
+ TS
50
+ And talk about the weather and but if you said something bad about yourself, you would do a record so just think about how to implement that I don't know if you can implement that all in one
51
+
52
+ You
53
+ He
54
+
55
+ TS
56
+ prompt like you have, but if you can great if you cannot, you'll have to have a prompt that talks to you nicely and then if it
57
+
58
+ You
59
+ OK
60
+
61
+ TS
62
+ determines that it's time to kick off a thought record you probably need that Trump to give you a Jason signal of like oh shit Hey TJ time to pull out the big guns let's get a thought record
63
+
64
+ You
65
+ Yes
66
+
67
+ TS
68
+ started. It could say something like that in an ex attack just like that.
69
+
70
+ You
71
+ Yes
72
+
73
+ TS
74
+ And then then you have a function called oh shit TJ I think it's time we start a record and then that function fires and then does your thing
75
+
76
+ You
77
+ Gas Like a normal engine, but when the moment comes in, it's like what it was right
78
+
79
+ TS
80
+ Yeah, and you should also know when to exit your conversation which generally is when you're done you
81
+
82
+ You
83
+ Yeah
84
+
85
+ TS
86
+ you can you can simplify it how about when you're done with the thought correctly you come back out to the one so not how you do that is up to you because your
87
+
88
+ You
89
+ Yes, yes yes yes yes
90
+
91
+ TS
92
+ API may have to set flags to remind it where it's at in between API calls. There's a lot there right to keep track.
93
+
94
+ You
95
+ Yeah, yeah yeah yes
96
+
97
+ TS
98
+ If you look at my other project, I use flags to determine which question I was on you could do the same. I recommend the same thing.
99
+
100
+ You
101
+ The laser
102
+
103
+ TS
104
+ What's up?
105
+
106
+ You
107
+ Talking about the API
108
+
109
+ TS
110
+ The diary thing has a live API diary Diary
111
+
112
+ You
113
+ Yes, yes yes yes
114
+
115
+ TS
116
+ whatever and that API if you notice has four
117
+
118
+ You
119
+ Yeah
120
+
121
+ TS
122
+ questions that it wants to ask plus the start in an end and it's not good at conversation it's just you talk to it. It's good. It's gonna ask you question one and you keep talking to it a few more times it's gonna ask you questions 23 and four you talk to it one more time it doesn't do anything at all. It's gonna happen. Get you out of that conversation and you'll see the bullying flags so input an output in the API is all structured JSON so that so that the elevator might do something with text but that tickets return to your structure Jason so you know here's what the LM said here's my checklist of questions that were on Because otherwise, you can't stateless Doesn't remember where it's at you have to tell it
123
+
124
+ You
125
+ Yes, yes yes
126
+
127
+ TS
128
+ everything right so
129
+
130
+ You
131
+ Yeah, this is boring The moment we open the chat at the moment user counseling normally, so while he
132
+
133
+ TS
134
+ Yeah
135
+
136
+ You
137
+ was having this conversation Go to work, but it's not a main priority to try to CPT right then the moment when they use the same bad about himself, the patient should start tracking what he was talking about right The normal conversation is the beginning of the conversation right and What she got what you want just discussed about the thing you know talk is identified his problems and everything then he wants to make a normal conversation again then The model normally without acting like a CB thing, right So I need to design system problem It's first objective to focus on regular The moment His problems in tech needs to identify that he is discussing about his personal problems So that the model has to you know know more about the user and do everything. The patient also talk what you saying right and the CPT dude job once it is done we might talk he he wants to the session. This is what we are talking about.
138
+
139
+ TS
140
+ No, that's it that girl
141
+
142
+ You
143
+ Yeah Yeah, you know what if you have time look carefully of what the system bronchi design, which is working with a queen and two I will send you the system prompt by mail look
144
+
145
+ TS
146
+ OK
147
+
148
+ You
149
+ carefully. It is long, but it is it is it is good.
150
+
151
+ TS
152
+ OK, I'll take a look at it email OK I'll see I'll look for it
153
+
154
+ You
155
+ Yeah, I'm sending you right away, but that, but I know I will modify the system at the same time then I will modify the AP
156
+
157
+ TS
158
+ Sounds good man Hey, I'm at. I'm having some luck right now. Fine tuning The NER bar
159
+
160
+ You
161
+ Sorry I was. I wasn't listening. My headphones turned off yeah using something.
162
+
163
+ TS
164
+ I'm having a little I'm having a little bit of luck right now using the fine tuned fine-tuning right now with all my
165
+
166
+ You
167
+ 00 the thing cool
168
+
169
+ TS
170
+ examples Yeah, it's it's. It's a production. It's a training right now going yeah I am pretty happy with it.
171
+
172
+ You
173
+ Great man Ha ha you're in a beach
174
+
175
+ TS
176
+ I'm not
177
+
178
+ You
179
+ Paris you have to be happy with someone else
180
+
181
+ TS
182
+ Yeah, yeah yeah I'm at 76 % F1
183
+
184
+ You
185
+ Cool
186
+
187
+ TS
188
+ score right now and I'm about to run a little run a little thing and see how it's doing and run the test right now cause I hold on it's running right now. This is my first test let's see.
189
+
190
+ You
191
+ Let me see how that goes Yeah
192
+
193
+ TS
194
+ Oh shoot freaking I hit a button and close the damn test sucks OK, let's see sample one OK, yeah about half of them did really well Oh yeah yeah OK yeah it's doing OK. I think it's it. Learn new
195
+
196
+ You
197
+ Edit
198
+
199
+ TS
200
+ things.
201
+
202
+ You
203
+ It's working
204
+
205
+ TS
206
+ Yeah, what is it someone? It looks like it looks like it learned some new things. I don't know I like for instance, the word lab results right Aiden didn't know anything about lab results before but it found something related to lab results. That's cool. It's not perfect yet I can't say that work yet but It's doing something definitely doing something
207
+
208
+ You
209
+ Your type put system wrong, which I'm sending you as a stage one we go and go further we look closely on what we actually doing how we are designing the prom and can we make it better?
210
+
211
+ TS
212
+ Look look look for my old prompt I sent you remember I sent you one. Do you see it anywhere?
213
+
214
+ You
215
+ Yes, yes, I tried the same system wrong and put it in the publicity added the the papers and designed this one
216
+
217
+ TS
218
+ Yeah, the part I said he was the one that worked pretty well. I tested it out several times. It's at least it's at least good at talking to
219
+
220
+ You
221
+ Yeah
222
+
223
+ TS
224
+ you and determining when to jump into the conversation and it's also good about getting back out of the conversation so think of it from that view, it was it was it was good.
225
+
226
+ You
227
+ Yes
228
+
229
+ TS
230
+ So anyway, have fun with your man I just wanna give you an update cause I feel like I'm not gonna see Cyrus slipping away but like
231
+
232
+ You
233
+ Why like that?
234
+
235
+ TS
236
+ They don't they don't talk to me as often in the morning like they used to. They don't rely on me as much so my contribution is gonna be pure data science right
237
+
238
+ You
239
+ Nothing
240
+
241
+ TS
242
+ so you know I got my client for the chat by pure data science I don't know, man I'm just a remote worker right now and if I don't deliver some data science. I don't know I really do think the pirates are gonna get your money
243
+
244
+ You
245
+ Cousin
246
+
247
+ TS
248
+ Yeah
249
+
250
+ You
251
+ I have a iPhone Activity and focus
252
+
253
+ TS
254
+ Yeah, and when you're when your dude comes down, what's his name? What's his name again?
255
+
256
+ You
257
+ He's coming in a week
258
+
259
+ TS
260
+ OK, he should
261
+
262
+ You
263
+ Yeah
264
+
265
+ TS
266
+ be working on a nice you This like you know the chat later just
267
+
268
+ You
269
+ Cool I'm glad that you are giving you my book like it
270
+
271
+ TS
272
+ But I mean sign into it that way we just rapidly we don't have to wait for it to show up in their app, but we just keep showing him some stuff
273
+
274
+ You
275
+ Yes, exactly So how about the design for for what do we call the interesting the chat bought That's what you're talking about
276
+
277
+ TS
278
+ I just I just like one in one day remember but I showed it to you written python dash to have the
279
+
280
+ You
281
+ No, I guess but anyway
282
+
283
+ TS
284
+ conversation. Remember what I showed you You said you said damn Travis how did you make that I just whipped it up today and I showed you how to
285
+
286
+ You
287
+ It was Travis, but I'm talking about I forgot the name of the insurance company ebonies chat
288
+
289
+ TS
290
+ 00
291
+
292
+ You
293
+ Apple
294
+
295
+ TS
296
+ 0 to the other chat, we're working on Let's work on some UI to showcase your work at
297
+
298
+ You
299
+ Yes, yes thank you
300
+
301
+ TS
302
+ Sarah so that we keep the job and then we'll ship us some products and get into some kick ass shit but that way we don't have any
303
+
304
+ You
305
+ Yes, I get that
306
+
307
+ TS
308
+ dependencies waiting for everybody likes a good demo right
309
+
310
+ You
311
+ Yes, yes yes yes
312
+
313
+ TS
314
+ so You know like going to a swagger API Let's have him whip it up in the shape like real quick so that like
315
+
316
+ You
317
+ I don't wanna talk about this right now, but I've been going through a tool called WY which is the use of friendly things that can enable to
318
+
319
+ TS
320
+ Oh
321
+
322
+ You
323
+ llama
324
+
325
+ TS
326
+ Oh yeah, open open web you guys really really nice
327
+
328
+ You
329
+ Yeah, I've been going through it
330
+
331
+ TS
332
+ but but but but that's not but open web. I don't think he's gonna call your chat in point like open Web is gonna chat directly with. Like a fine tune model or a model running on llamas it's not gonna have your python logic that you just made
333
+
334
+ You
335
+ Object of Afghanistan
336
+
337
+ TS
338
+ You're gonna need a layer of python logic to keep track of context beyond what an LLM can do right
339
+
340
+ You
341
+ Yeah, yeah yeah yes
342
+
343
+ TS
344
+ so so that's maybe not a thing that you can do
345
+
346
+ You
347
+ Wiley Why I want to use open the house because I'm going to big Bertha opening a llama from the model pasting near the whole conversation in the API and I don't wanna do that when I was about to show you something I want something as smooth interface I can connect with it and put the conversation on chat and write in front of that. Want to connect the API from the interface and.com see the results. That's what I was planned to do but yeah nice idea. Just tell me.
348
+
349
+
350
+
351
+
352
+ Your boss is giving you feedback on your CBT system. The key points are:
353
+
354
+ 1. **Missing the "bread of the sandwich"** - You have the core CBT thought record functionality (the "meat"), but you're missing:
355
+ - The casual conversation phase that leads up to identifying problems
356
+ - The transition back to normal conversation after CBT is complete
357
+
358
+ 2. **Need for conversation flow management** - Your boss suggests you need:
359
+ - A system that starts with friendly chat and watches for self-defeating statements
360
+ - A trigger mechanism (like a JSON signal) to switch into CBT mode
361
+ - Flags/state management to track where you are in the conversation flow
362
+
363
+ 3. **API state management** - Since APIs are stateless, you need to:
364
+ - Use structured JSON input/output
365
+ - Maintain flags to remember conversation state between calls
366
+ - Track which CBT question you're currently on
367
+
368
+ ## From the Robust System Prompt:
369
+
370
+ This addresses exactly what your boss mentioned! It defines:
371
+
372
+ 1. **Two-phase operation**:
373
+ - **Phase 1: Chit-Chat** - Natural conversation until self-defeating statement detected
374
+ - **Phase 2: CBT Refactoring** - Structured 5-step question sequence
375
+
376
+ 2. **Clear transition triggers** - When user expresses self-defeating beliefs
377
+
378
+ 3. **Structured CBT sequence** with specific questions (1.0 → 1.1 → 1.2 → 1.3 → 1.4 → 1.5)
379
+
380
+ 4. **Conversation management rules** for handling avoidance, resistance, etc.
381
+
382
+ ## From your API code:
383
+
384
+ Your current `gemini_qa_evaluator_api.py` has the evaluation logic but seems focused on scoring responses rather than managing the conversation flow your boss described.
385
+
386
+ **The gap**: You need to modify your API to implement the two-phase system from the robust prompt, with proper state management to track conversation phases and CBT question progression between API calls.
387
+
388
+ > Tokens: 11k sent, 434 received. Cost: $0.04 message, $0.04 session.
389
+ > Evaluations/Gemini_QA_Testing/gemini_qa_evaluator_api.py
390
+ > Add file to the chat? (Y)es/(N)o/(D)on't ask again [Yes]: n
391
+
392
+ #### /ask FYI my api file is step4training.py
393
+
394
+ #### FYI my api file is step4training.py
395
+
396
+ Thank you for the clarification! Looking at `step4training.py`, I can see this is your CBT evaluator training and API code. Here's what I understand:
397
+
398
+ ## From step4training.py:
399
+
400
+ This file contains:
401
+
402
+ 1. **A training system** (`CBTEvaluatorSimple` class) that:
403
+ - Processes conversation data to train a model
404
+ - Uses TF-IDF vectorization and Gradient Boosting to predict CBT scores
405
+ - Evaluates conversations on 5 dimensions: thought_identification, emotion_exploration, distortion_identification, evidence_gathering, balanced_thinking
406
+
407
+ 2. **A Flask API** (`create_flask_app`) that:
408
+ - Loads the trained model
409
+ - Has an `/evaluate` endpoint that scores conversations
410
+ - Returns predictions for the 5 CBT dimensions
411
+
412
+ ## The Gap Your Boss Identified:
413
+
414
+ Your current API is an **evaluator/scorer** - it takes a complete conversation and gives it scores. But what your boss wants is a **conversational agent** that:
415
+
416
+ 1. **Conducts the conversation** (not just evaluates it)
417
+ 2. **Manages conversation state** between API calls
418
+ 3. **Switches between casual chat and CBT mode** based on triggers
419
+ 4. **Tracks progress through the CBT question sequence**
420
+
421
+ ## What You Need to Build:
422
+
423
+ A new API that implements the robust system prompt's two-phase approach:
424
+ - **Phase 1 endpoint**: Handles casual conversation, detects self-defeating statements
425
+ - **Phase 2 endpoint**: Manages the structured CBT question sequence (1.0 → 1.1 → 1.2 → 1.3 → 1.4 → 1.5)
426
+ - **State management**: JSON flags to track conversation phase and current CBT question
427
+ - **Transition logic**: Switches from Phase 1 to Phase 2 when triggers are detected
428
+
429
+ Your current `step4training.py` could be used to evaluate the quality of conversations generated by this new conversational API, but it's not the conversational agent itself that your boss is asking for.
RegressionEvaluation/cbt_evaluator_data/dataset_dict.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"splits": ["train", "validation", "test"]}
RegressionEvaluation/cbt_evaluator_data/test/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a14550d1a97850de93334b495a6fea6326ee639f617582576ca1b3d6430e29a
3
+ size 976992
RegressionEvaluation/cbt_evaluator_data/test/dataset_info.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "id": {
6
+ "dtype": "string",
7
+ "_type": "Value"
8
+ },
9
+ "conversation_text": {
10
+ "dtype": "string",
11
+ "_type": "Value"
12
+ },
13
+ "thought_identification": {
14
+ "dtype": "float64",
15
+ "_type": "Value"
16
+ },
17
+ "emotion_exploration": {
18
+ "dtype": "float64",
19
+ "_type": "Value"
20
+ },
21
+ "distortion_identification": {
22
+ "dtype": "float64",
23
+ "_type": "Value"
24
+ },
25
+ "evidence_gathering": {
26
+ "dtype": "float64",
27
+ "_type": "Value"
28
+ },
29
+ "balanced_thinking": {
30
+ "dtype": "float64",
31
+ "_type": "Value"
32
+ },
33
+ "total_turns": {
34
+ "dtype": "int64",
35
+ "_type": "Value"
36
+ },
37
+ "turns_included": {
38
+ "dtype": "int64",
39
+ "_type": "Value"
40
+ },
41
+ "initial_problem": {
42
+ "dtype": "string",
43
+ "_type": "Value"
44
+ },
45
+ "conversation_progress": {
46
+ "dtype": "float64",
47
+ "_type": "Value"
48
+ },
49
+ "__index_level_0__": {
50
+ "dtype": "int64",
51
+ "_type": "Value"
52
+ },
53
+ "input_ids": {
54
+ "feature": {
55
+ "dtype": "int32",
56
+ "_type": "Value"
57
+ },
58
+ "_type": "Sequence"
59
+ },
60
+ "attention_mask": {
61
+ "feature": {
62
+ "dtype": "int8",
63
+ "_type": "Value"
64
+ },
65
+ "_type": "Sequence"
66
+ }
67
+ },
68
+ "homepage": "",
69
+ "license": ""
70
+ }
RegressionEvaluation/cbt_evaluator_data/test/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "a5dfd42da74c9a05",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": null
13
+ }
RegressionEvaluation/cbt_evaluator_data/tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
RegressionEvaluation/cbt_evaluator_data/tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
RegressionEvaluation/cbt_evaluator_data/tokenizer/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
RegressionEvaluation/cbt_evaluator_data/tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50264": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": false,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "extra_special_tokens": {},
51
+ "mask_token": "<mask>",
52
+ "model_max_length": 512,
53
+ "pad_token": "<pad>",
54
+ "sep_token": "</s>",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": "<unk>"
58
+ }
RegressionEvaluation/cbt_evaluator_data/tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
RegressionEvaluation/cbt_evaluator_data/train/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce4f73f3a67b58f692537289daccccca7ffc1982a27cb1dce0ce1bc46d732383
3
+ size 4852328
RegressionEvaluation/cbt_evaluator_data/train/dataset_info.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "id": {
6
+ "dtype": "string",
7
+ "_type": "Value"
8
+ },
9
+ "conversation_text": {
10
+ "dtype": "string",
11
+ "_type": "Value"
12
+ },
13
+ "thought_identification": {
14
+ "dtype": "float64",
15
+ "_type": "Value"
16
+ },
17
+ "emotion_exploration": {
18
+ "dtype": "float64",
19
+ "_type": "Value"
20
+ },
21
+ "distortion_identification": {
22
+ "dtype": "float64",
23
+ "_type": "Value"
24
+ },
25
+ "evidence_gathering": {
26
+ "dtype": "float64",
27
+ "_type": "Value"
28
+ },
29
+ "balanced_thinking": {
30
+ "dtype": "float64",
31
+ "_type": "Value"
32
+ },
33
+ "total_turns": {
34
+ "dtype": "int64",
35
+ "_type": "Value"
36
+ },
37
+ "turns_included": {
38
+ "dtype": "int64",
39
+ "_type": "Value"
40
+ },
41
+ "initial_problem": {
42
+ "dtype": "string",
43
+ "_type": "Value"
44
+ },
45
+ "conversation_progress": {
46
+ "dtype": "float64",
47
+ "_type": "Value"
48
+ },
49
+ "__index_level_0__": {
50
+ "dtype": "int64",
51
+ "_type": "Value"
52
+ },
53
+ "input_ids": {
54
+ "feature": {
55
+ "dtype": "int32",
56
+ "_type": "Value"
57
+ },
58
+ "_type": "Sequence"
59
+ },
60
+ "attention_mask": {
61
+ "feature": {
62
+ "dtype": "int8",
63
+ "_type": "Value"
64
+ },
65
+ "_type": "Sequence"
66
+ }
67
+ },
68
+ "homepage": "",
69
+ "license": ""
70
+ }
RegressionEvaluation/cbt_evaluator_data/train/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "47d96b74bfc1eb76",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": null
13
+ }
RegressionEvaluation/cbt_evaluator_data/validation/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ad41d971d2935fea7d1c69c72d88af9fa71ad193225a75a0eb16dcc1b0af73c
3
+ size 1219856
RegressionEvaluation/cbt_evaluator_data/validation/dataset_info.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "id": {
6
+ "dtype": "string",
7
+ "_type": "Value"
8
+ },
9
+ "conversation_text": {
10
+ "dtype": "string",
11
+ "_type": "Value"
12
+ },
13
+ "thought_identification": {
14
+ "dtype": "float64",
15
+ "_type": "Value"
16
+ },
17
+ "emotion_exploration": {
18
+ "dtype": "float64",
19
+ "_type": "Value"
20
+ },
21
+ "distortion_identification": {
22
+ "dtype": "float64",
23
+ "_type": "Value"
24
+ },
25
+ "evidence_gathering": {
26
+ "dtype": "float64",
27
+ "_type": "Value"
28
+ },
29
+ "balanced_thinking": {
30
+ "dtype": "float64",
31
+ "_type": "Value"
32
+ },
33
+ "total_turns": {
34
+ "dtype": "int64",
35
+ "_type": "Value"
36
+ },
37
+ "turns_included": {
38
+ "dtype": "int64",
39
+ "_type": "Value"
40
+ },
41
+ "initial_problem": {
42
+ "dtype": "string",
43
+ "_type": "Value"
44
+ },
45
+ "conversation_progress": {
46
+ "dtype": "float64",
47
+ "_type": "Value"
48
+ },
49
+ "__index_level_0__": {
50
+ "dtype": "int64",
51
+ "_type": "Value"
52
+ },
53
+ "input_ids": {
54
+ "feature": {
55
+ "dtype": "int32",
56
+ "_type": "Value"
57
+ },
58
+ "_type": "Sequence"
59
+ },
60
+ "attention_mask": {
61
+ "feature": {
62
+ "dtype": "int8",
63
+ "_type": "Value"
64
+ },
65
+ "_type": "Sequence"
66
+ }
67
+ },
68
+ "homepage": "",
69
+ "license": ""
70
+ }
RegressionEvaluation/cbt_evaluator_data/validation/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "8b7d290acee9e6a9",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": null
13
+ }
RegressionEvaluation/cbt_evaluator_simple/metrics.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "overall": {
3
+ "mse": 0.015408458057955016,
4
+ "r2": 0.8579161244122858
5
+ },
6
+ "dimensions": {
7
+ "thought_identification": {
8
+ "mse": 0.0005171077722237684,
9
+ "r2": 0.9488490553780022
10
+ },
11
+ "emotion_exploration": {
12
+ "mse": 0.004230827797500558,
13
+ "r2": 0.9134436984863282
14
+ },
15
+ "distortion_identification": {
16
+ "mse": 0.015500641867672431,
17
+ "r2": 0.7668513366397565
18
+ },
19
+ "evidence_gathering": {
20
+ "mse": 0.02063767635930626,
21
+ "r2": 0.8227466515848478
22
+ },
23
+ "balanced_thinking": {
24
+ "mse": 0.03615603649307203,
25
+ "r2": 0.837689879972495
26
+ }
27
+ }
28
+ }
RegressionEvaluation/cbt_evaluator_simple/model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b66f93ac68505544c91045985960ae584a604ef7c83f57d48cdfc8c1c2738241
3
+ size 1608454
RegressionEvaluation/cbt_evaluator_simple/predictions_vs_actual.png ADDED
RegressionEvaluation/cbt_evaluator_simple/processed_data.csv ADDED
The diff for this file is too large to render. See raw diff
 
RegressionEvaluation/cbt_evaluator_simple/vectorizer.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:488e6ed430504cbe6e9a69a4ef838b84243240d071e8e4fa0d73a43ae11bbf22
3
+ size 207096
RegressionEvaluation/cbt_evaluator_training_data.csv ADDED
The diff for this file is too large to render. See raw diff
 
RegressionEvaluation/cbt_evaluator_training_data.json ADDED
The diff for this file is too large to render. See raw diff
 
RegressionEvaluation/evaluate_conversation.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+
4
+ def format_conversation(raw_conversation):
5
+ """Format a raw conversation into the expected format for the CBT evaluator."""
6
+ # Split the conversation into lines
7
+ lines = raw_conversation.strip().split('\n')
8
+
9
+ formatted_text = ""
10
+ current_speaker = None
11
+ current_message = ""
12
+
13
+ for line in lines:
14
+ line = line.strip()
15
+ if not line:
16
+ continue
17
+
18
+ # Check if this is a new speaker
19
+ if line.startswith('>>>'):
20
+ # User message
21
+ if current_speaker:
22
+ # Add the previous message
23
+ formatted_text += f"{current_speaker}: {current_message.strip()}\n\n"
24
+
25
+ current_speaker = "User"
26
+ current_message = line.replace('>>>', '').strip()
27
+ elif current_speaker == "User" and not line.startswith('>>>'):
28
+ # This is the assistant's response
29
+ formatted_text += f"User: {current_message.strip()}\n\n"
30
+ current_speaker = "Assistant"
31
+ current_message = line
32
+ elif current_speaker == "Assistant":
33
+ # Continue the assistant's message
34
+ current_message += " " + line
35
+
36
+ # Add the last message
37
+ if current_speaker:
38
+ formatted_text += f"{current_speaker}: {current_message.strip()}\n\n"
39
+
40
+ return formatted_text.strip()
41
+
42
+ def evaluate_conversation(conversation_text, api_url="http://127.0.0.1:5009/evaluate"):
43
+ """Send a conversation to the CBT evaluator API and get the scores."""
44
+ response = requests.post(
45
+ api_url,
46
+ headers={"Content-Type": "application/json"},
47
+ json={"conversation": conversation_text}
48
+ )
49
+
50
+ if response.status_code == 200:
51
+ return response.json()
52
+ else:
53
+ print(f"Error: {response.status_code}")
54
+ print(response.text)
55
+ return None
56
+
57
+ # Example conversation
58
+ raw_conversation = """
59
+ Hello! I'm here to chat with you about anything on your mind. How's your day been so far?
60
+
61
+ >>> Hey there!
62
+ Hi! It's great to hear from you. What's been going on lately? Anything interesting happening in your world?
63
+
64
+ >>> nothing. I am alright just having lot of things running in my mind
65
+ I see. Sometimes it can feel overwhelming when we have a lot of thoughts buzzing around. Is there something specific that's been weighing on your mind lately? You don't have to go into details if you
66
+ don't want to, but sometimes talking about it can help sort through those thoughts.
67
+
68
+ >>> sure, I feel like I am working alot but not productive. I am 25 and I am working for almost 16hours a day thinking about the future all the time. it's just confusing like my age people are having fun,
69
+ ... having girlfriends, here I am working extensively.
70
+ Thank you for sharing that with me. It sounds like a tough situation. Could you tell me what moods you're feeling right now, and maybe rate their intensity from 0 to 100%?
71
+
72
+ >>> 75 maybe
73
+ Got it. So, you're feeling around 75% of something challenging right now. What evidence from your situation supports the view that "I am working a lot but not productive"? Are there specific moments
74
+ or tasks where you felt this way?
75
+
76
+ >>> I want to want hard and do multiple things, but when I see the end of the day, every work is in pending. that 's where I feel like I am working hard but not productive.
77
+ Okay, thank you for sharing that perspective. Now, can you think of any experiences, even small ones, that might contradict the view that "I'm working a lot but not productive"? For example, are
78
+ there any tasks or projects where you felt a sense of accomplishment or progress?
79
+
80
+ >>> yea, I have been working on a AI project which is still pending, besides, my boss gave me a task that has to be ocmpleted soon, but still I am working on that project.
81
+ That's a good example. Can you think of any others? Sometimes even little things count.
82
+
83
+ >>> right not that's all
84
+ Alright, thank you for sharing those examples. Now that we've looked at both perspectives, how could you rephrase your original thought in a way that feels more balanced to you, considering the
85
+ evidence and experiences you mentioned?
86
+
87
+ >>> what do you mean?
88
+ I mean, given what you've told me about working on the AI project and completing tasks for your boss, can you find a way to describe your situation that acknowledges both the challenges and the
89
+ progress you're making? For example, instead of saying "I'm working hard but not productive," how might you phrase it in a more balanced way?
90
+
91
+ >>> I am trying to work and complete it but I am failing to complete the task on time.
92
+ Now that we've looked at examples for and against your original thought, how are you feeling now? Earlier you mentioned feeling around 75%. How would you rate those same moods on a scale of 0-100%
93
+ now?
94
+
95
+ >>> not at all. still 75%. in fact, I have to work now but I am chatting with you.
96
+ I understand that you feel like you're not completing tasks on time and that's contributing to your current mood. Let's try to find a more balanced way of looking at your situation.
97
+
98
+ You mentioned working hard on an AI project and having another task from your boss. These are significant responsibilities. It's possible to acknowledge the challenges while also recognizing your
99
+ efforts and progress.
100
+
101
+ Could you maybe say something like, "I'm trying to balance multiple important tasks, which is challenging, but I am making progress even if it's not as fast as I'd like"?
102
+
103
+ >>> what happens if I say that?
104
+ If you rephrase your thought in a more balanced way, such as "I'm trying to balance multiple important tasks, which is challenging, but I am making progress even if it's not as fast as I'd like," you
105
+ might start to see your situation from a different perspective. This can help reduce some of the negative feelings and give you a clearer view of what you're actually achieving.
106
+
107
+ How does that sound to you?
108
+
109
+ >>> it is great, but I want a practical solution. I want a solution that just helps my mind to get back to work and get my things together.
110
+ I understand that you're looking for a practical solution to help you refocus on your tasks. Here's what we can do:
111
+
112
+ First, let's try setting small, manageable goals for yourself. Break down your tasks into smaller steps that you can complete in shorter periods of time. This will make them feel less overwhelming
113
+ and more achievable.
114
+
115
+ Second, consider taking short breaks between work sessions to give your mind a chance to rest and recharge. You might use techniques like the Pomodoro Technique (25 minutes of focused work followed
116
+ by a 5-minute break).
117
+
118
+ Third, prioritize your tasks based on urgency and importance. Focus on completing the most urgent or important task first before moving on to less critical ones.
119
+
120
+ Finally, try to be kind to yourself and recognize that it's normal to feel overwhelmed sometimes. It's okay if every single thing isn't completed by the end of the day as long as you're making
121
+ progress towards your goals.
122
+
123
+ Would these steps help you get back to work?
124
+
125
+ >>> this might help
126
+ Great! Setting small, manageable goals and taking breaks can definitely make a difference in how productive you feel. Here's a quick summary of what you mentioned:
127
+
128
+ 1. **Set Small Goals**: Break down tasks into smaller, more achievable steps.
129
+ 2. **Take Short Breaks**: Use techniques like the Pomodoro Technique to rest your mind between work sessions.
130
+ 3. **Prioritize Tasks**: Focus on urgent and important tasks first.
131
+ 4. **Be Kind to Yourself**: Acknowledge that feeling overwhelmed is normal and recognize progress.
132
+
133
+ Now, how does that make you feel about getting back to work? Does this help clarify things for you?
134
+
135
+ >>> yea, kind of
136
+ That's a good start! If you're ready, we can chat more about it or if there's anything else on your mind.
137
+
138
+ >>> right now, I only have the productivity problem
139
+ Got it. Let's focus on productivity then. Have you tried any specific techniques in the past to improve your productivity? What worked well for you, and what didn't work as expected? Understanding
140
+ what has worked before can help tailor better strategies moving forward.
141
+
142
+ >>> keeping my mind focussed have helped me alot
143
+ That sounds like a great strategy! Keeping your mind focused is key to improving productivity. Let's try to expand on that:
144
+
145
+ 1. **Focus Techniques**: What specific techniques do you use to keep yourself focused? Do you find that certain environments or conditions help more than others?
146
+
147
+ 2. **Eliminate Distractions**: Are there any common distractions you can identify and maybe reduce? For example, turning off notifications or finding a quiet workspace.
148
+
149
+ 3. **Mindfulness and Breaks**: Sometimes taking short mindfulness breaks can actually improve focus for the longer sessions. Have you tried that before?
150
+
151
+ 4. **Tools and Apps**: Are there any productivity tools or apps you use to stay on track? Things like to-do lists, timers, or focus apps might help.
152
+
153
+ Would you like to dive deeper into any of these strategies, or is there another aspect of productivity you want to explore?
154
+
155
+ >>> these works better.
156
+ Great! It's good to know that focusing techniques have worked well for you in the past. Let's refine and expand on those:
157
+
158
+ 1. **Create a Focused Workspace**: Make sure your workspace is conducive to concentration. This might mean having a clean desk, minimizing clutter, or finding a quiet place where you can work without
159
+ interruptions.
160
+
161
+ 2. **Use Time Blocking**: Allocate specific time slots for different tasks throughout your day. This can help you stay focused and avoid multitasking, which often decreases productivity.
162
+
163
+ 3. **Take Regular Breaks**: Even short breaks can refresh your mind. Consider techniques like the Pomodoro Technique (work for 25 minutes, then take a 5-minute break).
164
+
165
+ 4. **Set Clear Goals**: Before starting work each day or session, set clear goals for what you want to achieve. This helps keep you motivated and focused on the task at hand.
166
+
167
+ 5. **Eliminate Distractions**: Identify common distractions and try to minimize them. This could mean turning off notifications, using website blockers, or finding a quiet place to work.
168
+
169
+ 6. **Mindfulness Practices**: Incorporating short mindfulness practices can improve your ability to focus. Even taking a few deep breaths or practicing meditation for 5-10 minutes before starting
170
+ work can make a difference.
171
+
172
+ Would any of these strategies help you further improve your productivity?
173
+
174
+ >>> yea, I will try those.
175
+ That sounds like a solid plan! Let's summarize what we've discussed so far:
176
+
177
+ ### Productivity Strategies:
178
+ 1. **Create a Focused Workspace**: Ensure your workspace is clean and free from distractions.
179
+ 2. **Use Time Blocking**: Allocate specific time slots for different tasks to avoid multitasking.
180
+ 3. **Take Regular Breaks**: Use techniques like the Pomodoro Technique (25 minutes work, 5 minutes break).
181
+ 4. **Set Clear Goals**: Define what you want to achieve before starting each work session.
182
+ 5. **Eliminate Distractions**: Turn off notifications and minimize interruptions.
183
+ 6. **Mindfulness Practices**: Incorporate short mindfulness breaks or deep breathing exercises.
184
+
185
+ ### Next Steps:
186
+ 1. Try implementing these strategies one by one over the next few days.
187
+ 2. Pay attention to what works best for you and adjust as needed.
188
+
189
+ Would you like any further assistance with these strategies, or is there anything else on your mind?
190
+
191
+ """
192
+
193
+ # Format the conversation
194
+ formatted_conversation = format_conversation(raw_conversation)
195
+
196
+ # Print the formatted conversation (optional)
197
+ print("Formatted Conversation:")
198
+ print(formatted_conversation)
199
+ print("\n" + "-"*50 + "\n")
200
+
201
+ # Evaluate the conversation
202
+ print("Sending to API for evaluation...")
203
+ evaluation_result = evaluate_conversation(formatted_conversation)
204
+
205
+ # Print the results
206
+ if evaluation_result:
207
+ print("\nEvaluation Results:")
208
+ print(json.dumps(evaluation_result, indent=2))
209
+
210
+ # Print a summary of the scores
211
+ print("\nCBT Dimension Scores:")
212
+ for dimension, score in evaluation_result["target_scores"].items():
213
+ print(f" {dimension}: {score}")
214
+
215
+ if __name__ == "__main__":
216
+ # If you want to evaluate a different conversation, you can replace raw_conversation
217
+ # with your own conversation text or load it from a file
218
+ pass
RegressionEvaluation/step1_data_formation.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import glob
4
+ import re
5
+ from collections import defaultdict
6
+ import pandas as pd
7
+ from tqdm import tqdm
8
+
9
+ # Define cognitive distortion patterns to identify initial problems
10
+ COGNITIVE_DISTORTIONS = {
11
+ "perfectionism": ["perfect", "flawless", "mistake", "error", "failure"],
12
+ "catastrophizing": ["disaster", "terrible", "horrible", "awful", "end of the world"],
13
+ "black_and_white_thinking": ["always", "never", "completely", "totally", "all or nothing"],
14
+ "mind_reading": ["think about me", "judging me", "think I'm", "perceive me"],
15
+ "overgeneralization": ["everything", "nothing", "everyone", "no one", "always goes wrong"],
16
+ "should_statements": ["should", "must", "have to", "ought to"],
17
+ "emotional_reasoning": ["feel like a failure", "feel worthless", "feel inadequate"],
18
+ "personalization": ["my fault", "blame myself", "responsible for", "caused this"]
19
+ }
20
+
21
+ def identify_initial_problem(initial_input):
22
+ """Identify the likely cognitive distortion from the initial input."""
23
+ initial_input = initial_input.lower()
24
+
25
+ # Count matches for each distortion type
26
+ matches = defaultdict(int)
27
+ for distortion, patterns in COGNITIVE_DISTORTIONS.items():
28
+ for pattern in patterns:
29
+ if pattern.lower() in initial_input:
30
+ matches[distortion] += 1
31
+
32
+ # Return the distortion with the most matches, or "unspecified" if none
33
+ if matches:
34
+ return max(matches.items(), key=lambda x: x[1])[0]
35
+ return "unspecified"
36
+
37
+ def clean_therapist_response(content):
38
+ """Clean the therapist response by removing reasoning tags."""
39
+ # Remove reasoning tags
40
+ content = re.sub(r'<reasoning>.*?</reasoning>', '', content, flags=re.DOTALL)
41
+ # Extract answer content
42
+ content = re.sub(r'<answer>(.*?)</answer>', r'\1', content, flags=re.DOTALL)
43
+ return content.strip()
44
+
45
+ def format_conversation(turns):
46
+ """Format conversation turns into a readable text format."""
47
+ formatted_text = ""
48
+ for turn in turns:
49
+ speaker = "User" if turn["speaker"] == "patient" else "Assistant"
50
+ content = turn["content"]
51
+
52
+ # Clean the content for assistant responses
53
+ if speaker == "Assistant":
54
+ content = clean_therapist_response(content)
55
+
56
+ formatted_text += f"{speaker}: {content}\n\n"
57
+
58
+ return formatted_text.strip()
59
+
60
+ def transform_conversation_to_training_examples(json_file_path):
61
+ """Transform a conversation JSON file into multiple training examples."""
62
+ try:
63
+ with open(json_file_path, 'r', encoding='utf-8') as f:
64
+ conversation_data = json.load(f)
65
+
66
+ # Extract basic information
67
+ conversation_id = os.path.basename(json_file_path).replace('.json', '')
68
+
69
+ # Check if the required fields exist
70
+ if "metadata" not in conversation_data or "conversation" not in conversation_data or "progress_areas" not in conversation_data:
71
+ print(f"Warning: Missing required fields in {json_file_path}")
72
+ return []
73
+
74
+ initial_input = conversation_data["metadata"]["initial_input"]
75
+ total_turns = len(conversation_data["conversation"])
76
+
77
+ # Extract reward scores
78
+ try:
79
+ target_scores = {
80
+ "thought_identification": float(conversation_data["progress_areas"]["Thought Identification"]),
81
+ "emotion_exploration": float(conversation_data["progress_areas"]["Emotion Exploration"]),
82
+ "distortion_identification": float(conversation_data["progress_areas"]["Distortion Identification"]),
83
+ "evidence_gathering": float(conversation_data["progress_areas"]["Evidence Gathering"]),
84
+ "balanced_thinking": float(conversation_data["progress_areas"]["Balanced Thinking"])
85
+ }
86
+ except (KeyError, ValueError) as e:
87
+ print(f"Warning: Issue with progress scores in {json_file_path}: {e}")
88
+ return []
89
+
90
+ # Identify the initial problem/cognitive distortion
91
+ initial_problem = identify_initial_problem(initial_input)
92
+
93
+ # Create training examples at different conversation lengths
94
+ training_examples = []
95
+
96
+ # Process the conversation at different points (every 2 turns)
97
+ for end_turn in range(2, total_turns + 1, 2):
98
+ # Extract conversation up to this point
99
+ conversation_so_far = conversation_data["conversation"][:end_turn]
100
+
101
+ # Format the conversation text
102
+ formatted_text = format_conversation(conversation_so_far)
103
+
104
+ # Calculate conversation progress
105
+ conversation_progress = round(end_turn / total_turns, 2)
106
+
107
+ # Create the training example
108
+ example = {
109
+ "id": f"{conversation_id}_turn_{end_turn}",
110
+ "conversation_text": formatted_text,
111
+ "target_scores": target_scores,
112
+ "metadata": {
113
+ "total_turns": total_turns,
114
+ "turns_included": end_turn,
115
+ "initial_problem": initial_problem,
116
+ "conversation_progress": conversation_progress
117
+ }
118
+ }
119
+
120
+ training_examples.append(example)
121
+
122
+ return training_examples
123
+
124
+ except Exception as e:
125
+ print(f"Error processing {json_file_path}: {e}")
126
+ return []
127
+
128
+ def process_all_conversations(conversations_dir, output_file):
129
+ """Process all conversation files and save as a single JSON file."""
130
+ all_examples = []
131
+
132
+ # Get all JSON files in the directory
133
+ json_files = glob.glob(os.path.join(conversations_dir, "*.json"))
134
+ print(f"Found {len(json_files)} conversation files")
135
+
136
+ # Process each file with a progress bar
137
+ for json_file in tqdm(json_files, desc="Processing conversations"):
138
+ examples = transform_conversation_to_training_examples(json_file)
139
+ all_examples.extend(examples)
140
+
141
+ print(f"Generated {len(all_examples)} training examples from {len(json_files)} conversations")
142
+
143
+ # Save all examples to a single file
144
+ with open(output_file, 'w', encoding='utf-8') as f:
145
+ json.dump(all_examples, f, indent=2)
146
+
147
+ print(f"Saved training examples to {output_file}")
148
+
149
+ # Also save a flattened version for easier inspection
150
+ flattened_examples = []
151
+ for example in all_examples:
152
+ flat_example = {
153
+ "id": example["id"],
154
+ "conversation_text": example["conversation_text"],
155
+ "thought_identification": example["target_scores"]["thought_identification"],
156
+ "emotion_exploration": example["target_scores"]["emotion_exploration"],
157
+ "distortion_identification": example["target_scores"]["distortion_identification"],
158
+ "evidence_gathering": example["target_scores"]["evidence_gathering"],
159
+ "balanced_thinking": example["target_scores"]["balanced_thinking"],
160
+ "total_turns": example["metadata"]["total_turns"],
161
+ "turns_included": example["metadata"]["turns_included"],
162
+ "initial_problem": example["metadata"]["initial_problem"],
163
+ "conversation_progress": example["metadata"]["conversation_progress"]
164
+ }
165
+ flattened_examples.append(flat_example)
166
+
167
+ # Save as CSV for easy viewing
168
+ df = pd.DataFrame(flattened_examples)
169
+ csv_output = output_file.replace('.json', '.csv')
170
+ df.to_csv(csv_output, index=False)
171
+ print(f"Saved flattened examples to {csv_output} for easy inspection")
172
+
173
+ return all_examples
174
+
175
+
176
+ # Example usage
177
+ if __name__ == "__main__":
178
+ # Set your conversations directory and output file
179
+ conversations_dir = "/Users/saitejagudidevini/Documents/Dev/grpo_trainer/Evaluations/Gemini_QA_Testing/conversations"
180
+ output_file = "cbt_evaluator_training_data.json"
181
+
182
+ # Process all conversations
183
+ examples = process_all_conversations(conversations_dir, output_file)
184
+
185
+ # Print some statistics
186
+ if examples:
187
+ print("\nDataset Statistics:")
188
+ print(f"Total examples: {len(examples)}")
189
+
190
+ # Count examples by initial problem
191
+ problem_counts = {}
192
+ for example in examples:
193
+ problem = example["metadata"]["initial_problem"]
194
+ problem_counts[problem] = problem_counts.get(problem, 0) + 1
195
+
196
+ print("\nExamples by initial problem:")
197
+ for problem, count in sorted(problem_counts.items(), key=lambda x: x[1], reverse=True):
198
+ print(f" {problem}: {count}")
199
+
200
+ # Show distribution of conversation lengths
201
+ turn_counts = {}
202
+ for example in examples:
203
+ turns = example["metadata"]["turns_included"]
204
+ turn_counts[turns] = turn_counts.get(turns, 0) + 1
205
+
206
+ print("\nExamples by conversation length (turns):")
207
+ for turns, count in sorted(turn_counts.items()):
208
+ print(f" {turns} turns: {count}")
RegressionEvaluation/step2_datasetformodeltraining.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import torch
3
+ import numpy as np
4
+ from sklearn.model_selection import train_test_split
5
+ from transformers import AutoTokenizer
6
+ from datasets import Dataset, DatasetDict
7
+ import pandas as pd
8
+ from tqdm import tqdm
9
+ import os
10
+
11
+ def prepare_cbt_datasets(input_file, model_name="roberta-base", test_size=0.15, val_size=0.15, max_length=512, seed=42):
12
+ """
13
+ Prepare datasets for training a CBT evaluator model.
14
+
15
+ Args:
16
+ input_file: Path to the JSON file containing the training examples
17
+ model_name: Name of the pre-trained model to use
18
+ test_size: Fraction of data to use for testing
19
+ val_size: Fraction of data to use for validation
20
+ max_length: Maximum sequence length for tokenization
21
+ seed: Random seed for reproducibility
22
+
23
+ Returns:
24
+ A DatasetDict containing train, validation, and test datasets
25
+ """
26
+ print(f"Loading data from {input_file}...")
27
+
28
+ # Load the data
29
+ with open(input_file, 'r', encoding='utf-8') as f:
30
+ examples = json.load(f)
31
+
32
+ print(f"Loaded {len(examples)} examples")
33
+
34
+ # Flatten the examples for easier processing
35
+ flattened_examples = []
36
+ for example in examples:
37
+ flat_example = {
38
+ "id": example["id"],
39
+ "conversation_text": example["conversation_text"],
40
+ "thought_identification": example["target_scores"]["thought_identification"],
41
+ "emotion_exploration": example["target_scores"]["emotion_exploration"],
42
+ "distortion_identification": example["target_scores"]["distortion_identification"],
43
+ "evidence_gathering": example["target_scores"]["evidence_gathering"],
44
+ "balanced_thinking": example["target_scores"]["balanced_thinking"],
45
+ "total_turns": example["metadata"]["total_turns"],
46
+ "turns_included": example["metadata"]["turns_included"],
47
+ "initial_problem": example["metadata"]["initial_problem"],
48
+ "conversation_progress": example["metadata"]["conversation_progress"]
49
+ }
50
+ flattened_examples.append(flat_example)
51
+
52
+ # Convert to DataFrame for easier manipulation
53
+ df = pd.DataFrame(flattened_examples)
54
+
55
+ # Print dataset statistics
56
+ print("\nDataset Statistics:")
57
+ print(f"Number of examples: {len(df)}")
58
+ print(f"Number of unique conversations: {df['id'].str.split('_turn_').str[0].nunique()}")
59
+ print(f"Average conversation length: {df['conversation_text'].str.len().mean():.1f} characters")
60
+
61
+ print("\nTarget Score Distributions:")
62
+ for col in ["thought_identification", "emotion_exploration", "distortion_identification",
63
+ "evidence_gathering", "balanced_thinking"]:
64
+ print(f" {col}: mean={df[col].mean():.2f}, std={df[col].std():.2f}, min={df[col].min():.2f}, max={df[col].max():.2f}")
65
+
66
+ # Split by conversation ID to prevent data leakage
67
+ # Extract the base conversation ID (without the turn number)
68
+ df['conversation_base_id'] = df['id'].str.split('_turn_').str[0]
69
+
70
+ # Get unique conversation IDs
71
+ unique_conv_ids = df['conversation_base_id'].unique()
72
+
73
+ # Split conversation IDs into train, validation, and test sets
74
+ train_ids, temp_ids = train_test_split(
75
+ unique_conv_ids,
76
+ test_size=test_size + val_size,
77
+ random_state=seed
78
+ )
79
+
80
+ val_ids, test_ids = train_test_split(
81
+ temp_ids,
82
+ test_size=test_size / (test_size + val_size),
83
+ random_state=seed
84
+ )
85
+
86
+ # Create train, validation, and test dataframes
87
+ train_df = df[df['conversation_base_id'].isin(train_ids)]
88
+ val_df = df[df['conversation_base_id'].isin(val_ids)]
89
+ test_df = df[df['conversation_base_id'].isin(test_ids)]
90
+
91
+ print(f"\nSplit sizes:")
92
+ print(f" Train: {len(train_df)} examples from {len(train_ids)} conversations")
93
+ print(f" Validation: {len(val_df)} examples from {len(val_ids)} conversations")
94
+ print(f" Test: {len(test_df)} examples from {len(test_ids)} conversations")
95
+
96
+ # Convert to Hugging Face datasets
97
+ train_dataset = Dataset.from_pandas(train_df)
98
+ val_dataset = Dataset.from_pandas(val_df)
99
+ test_dataset = Dataset.from_pandas(test_df)
100
+
101
+ # Create a DatasetDict
102
+ dataset_dict = DatasetDict({
103
+ 'train': train_dataset,
104
+ 'validation': val_dataset,
105
+ 'test': test_dataset
106
+ })
107
+
108
+ # Load tokenizer
109
+ print(f"\nLoading tokenizer for {model_name}...")
110
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
111
+
112
+ # Define tokenization function
113
+ def tokenize_function(examples):
114
+ return tokenizer(
115
+ examples["conversation_text"],
116
+ padding="max_length",
117
+ truncation=True,
118
+ max_length=max_length
119
+ )
120
+
121
+ # Tokenize the datasets
122
+ print("Tokenizing datasets...")
123
+ tokenized_datasets = dataset_dict.map(
124
+ tokenize_function,
125
+ batched=True,
126
+ desc="Tokenizing",
127
+ remove_columns=['conversation_base_id'] # Remove the helper column we added
128
+ )
129
+
130
+ # Save the processed datasets
131
+ output_dir = "cbt_evaluator_data"
132
+ os.makedirs(output_dir, exist_ok=True)
133
+
134
+ print(f"\nSaving processed datasets to {output_dir}...")
135
+ tokenized_datasets.save_to_disk(output_dir)
136
+
137
+ # Also save the tokenizer for consistency
138
+ tokenizer.save_pretrained(os.path.join(output_dir, "tokenizer"))
139
+
140
+ print("Dataset preparation complete!")
141
+ return tokenized_datasets
142
+
143
+ # Example usage
144
+ if __name__ == "__main__":
145
+ # Set your input file (output from Step 1)
146
+ input_file = "cbt_evaluator_training_data.json"
147
+
148
+ datasets = prepare_cbt_datasets(
149
+ input_file=input_file,
150
+ model_name="roberta-base", # You can change this to another model if preferred
151
+ test_size=0.15,
152
+ val_size=0.15,
153
+ max_length=512,
154
+ seed=42
155
+ )
156
+
157
+ # Print dataset sizes
158
+ print("\nFinal dataset sizes:")
159
+ for split, dataset in datasets.items():
160
+ print(f" {split}: {len(dataset)} examples")
161
+
162
+ # Print example from training set
163
+ print("\nExample from training set:")
164
+ example = datasets["train"][0]
165
+ print(f"ID: {example['id']}")
166
+ print(f"Input IDs shape: {len(example['input_ids'])}")
167
+ print(f"Target scores:")
168
+ print(f" Thought identification: {example['thought_identification']}")
169
+ print(f" Emotion exploration: {example['emotion_exploration']}")
170
+ print(f" Distortion identification: {example['distortion_identification']}")
171
+ print(f" Evidence gathering: {example['evidence_gathering']}")
172
+ print(f" Balanced thinking: {example['balanced_thinking']}")
173
+
RegressionEvaluation/step3_trainingthe_cbt_evaluator_model.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import numpy as np
4
+ from transformers import (
5
+ AutoModelForSequenceClassification,
6
+ AutoTokenizer,
7
+ TrainingArguments,
8
+ Trainer,
9
+ EarlyStoppingCallback
10
+ )
11
+ from datasets import load_from_disk
12
+ import evaluate
13
+ from sklearn.metrics import mean_squared_error, r2_score
14
+ import matplotlib.pyplot as plt
15
+ import pandas as pd
16
+ from tqdm import tqdm
RegressionEvaluation/step4training.py ADDED
@@ -0,0 +1,379 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import glob
4
+ import re
5
+ import numpy as np
6
+ import pandas as pd
7
+ from tqdm import tqdm
8
+ from sklearn.feature_extraction.text import TfidfVectorizer
9
+ from sklearn.ensemble import GradientBoostingRegressor
10
+ from sklearn.multioutput import MultiOutputRegressor
11
+ from sklearn.model_selection import train_test_split
12
+ from sklearn.metrics import mean_squared_error, r2_score
13
+ import pickle
14
+ import matplotlib.pyplot as plt
15
+ from collections import defaultdict
16
+ import joblib
17
+ from flask import Flask, request, jsonify
18
+ import uuid
19
+ from datetime import datetime
20
+
21
+ # Define cognitive distortion patterns
22
+ COGNITIVE_DISTORTIONS = {
23
+ "perfectionism": ["perfect", "flawless", "mistake", "error", "failure"],
24
+ "catastrophizing": ["disaster", "terrible", "horrible", "awful", "end of the world"],
25
+ "black_and_white_thinking": ["always", "never", "completely", "totally", "all or nothing"],
26
+ "mind_reading": ["think about me", "judging me", "think I'm", "perceive me"],
27
+ "overgeneralization": ["everything", "nothing", "everyone", "no one", "always goes wrong"],
28
+ "should_statements": ["should", "must", "have to", "ought to"],
29
+ "emotional_reasoning": ["feel like a failure", "feel worthless", "feel inadequate"],
30
+ "personalization": ["my fault", "blame myself", "responsible for", "caused this"]
31
+ }
32
+
33
+ class CBTEvaluatorSimple:
34
+ def __init__(self, output_dir="cbt_evaluator_simple", max_features=5000, random_state=42):
35
+ self.output_dir = output_dir
36
+ self.max_features = max_features
37
+ self.random_state = random_state
38
+
39
+ # Create output directory
40
+ os.makedirs(output_dir, exist_ok=True)
41
+
42
+ # Initialize feature extractor
43
+ self.vectorizer = TfidfVectorizer(
44
+ max_features=max_features,
45
+ ngram_range=(1, 2),
46
+ stop_words='english'
47
+ )
48
+
49
+ # Initialize model
50
+ self.model = MultiOutputRegressor(
51
+ GradientBoostingRegressor(
52
+ n_estimators=100,
53
+ learning_rate=0.1,
54
+ max_depth=5,
55
+ random_state=random_state
56
+ )
57
+ )
58
+
59
+ def identify_initial_problem(self, initial_input):
60
+ """Identify the likely cognitive distortion from the initial input."""
61
+ initial_input = initial_input.lower()
62
+
63
+ # Count matches for each distortion type
64
+ matches = defaultdict(int)
65
+ for distortion, patterns in COGNITIVE_DISTORTIONS.items():
66
+ for pattern in patterns:
67
+ if pattern.lower() in initial_input:
68
+ matches[distortion] += 1
69
+
70
+ # Return the distortion with the most matches, or "unspecified" if none
71
+ if matches:
72
+ return max(matches.items(), key=lambda x: x[1])[0]
73
+ return "unspecified"
74
+
75
+ def clean_therapist_response(self, content):
76
+ """Clean the therapist response by removing reasoning tags."""
77
+ # Remove reasoning tags
78
+ content = re.sub(r'<reasoning>.*?</reasoning>', '', content, flags=re.DOTALL)
79
+ # Extract answer content
80
+ content = re.sub(r'<answer>(.*?)</answer>', r'\1', content, flags=re.DOTALL)
81
+ return content.strip()
82
+
83
+ def format_conversation(self, turns):
84
+ """Format conversation turns into a readable text format."""
85
+ formatted_text = ""
86
+ for turn in turns:
87
+ speaker = "User" if turn["speaker"] == "patient" else "Assistant"
88
+ content = turn["content"]
89
+
90
+ # Clean the content for assistant responses
91
+ if speaker == "Assistant":
92
+ content = self.clean_therapist_response(content)
93
+
94
+ formatted_text += f"{speaker}: {content}\n\n"
95
+
96
+ return formatted_text.strip()
97
+
98
+ def process_conversations(self, conversations_dir):
99
+ """Process all conversation files into a dataset."""
100
+ all_examples = []
101
+
102
+ # Get all JSON files in the directory
103
+ json_files = glob.glob(os.path.join(conversations_dir, "*.json"))
104
+ print(f"Found {len(json_files)} conversation files")
105
+
106
+ # Process each file with a progress bar
107
+ for json_file in tqdm(json_files, desc="Processing conversations"):
108
+ try:
109
+ with open(json_file, 'r', encoding='utf-8') as f:
110
+ conversation_data = json.load(f)
111
+
112
+ # Extract basic information
113
+ conversation_id = os.path.basename(json_file).replace('.json', '')
114
+
115
+ # Check if the required fields exist
116
+ if "metadata" not in conversation_data or "conversation" not in conversation_data or "progress_areas" not in conversation_data:
117
+ print(f"Warning: Missing required fields in {json_file}")
118
+ continue
119
+
120
+ initial_input = conversation_data["metadata"]["initial_input"]
121
+ total_turns = len(conversation_data["conversation"])
122
+
123
+ # Extract reward scores
124
+ try:
125
+ target_scores = {
126
+ "thought_identification": float(conversation_data["progress_areas"]["Thought Identification"]),
127
+ "emotion_exploration": float(conversation_data["progress_areas"]["Emotion Exploration"]),
128
+ "distortion_identification": float(conversation_data["progress_areas"]["Distortion Identification"]),
129
+ "evidence_gathering": float(conversation_data["progress_areas"]["Evidence Gathering"]),
130
+ "balanced_thinking": float(conversation_data["progress_areas"]["Balanced Thinking"])
131
+ }
132
+ except (KeyError, ValueError) as e:
133
+ print(f"Warning: Issue with progress scores in {json_file}: {e}")
134
+ continue
135
+
136
+ # Identify the initial problem/cognitive distortion
137
+ initial_problem = self.identify_initial_problem(initial_input)
138
+
139
+ # Create training examples at different conversation lengths
140
+ for end_turn in range(2, total_turns + 1, 2):
141
+ # Extract conversation up to this point
142
+ conversation_so_far = conversation_data["conversation"][:end_turn]
143
+
144
+ # Format the conversation text
145
+ formatted_text = self.format_conversation(conversation_so_far)
146
+
147
+ # Calculate conversation progress
148
+ conversation_progress = round(end_turn / total_turns, 2)
149
+
150
+ # Create the training example
151
+ example = {
152
+ "id": f"{conversation_id}_turn_{end_turn}",
153
+ "conversation_text": formatted_text,
154
+ "thought_identification": target_scores["thought_identification"],
155
+ "emotion_exploration": target_scores["emotion_exploration"],
156
+ "distortion_identification": target_scores["distortion_identification"],
157
+ "evidence_gathering": target_scores["evidence_gathering"],
158
+ "balanced_thinking": target_scores["balanced_thinking"],
159
+ "total_turns": total_turns,
160
+ "turns_included": end_turn,
161
+ "initial_problem": initial_problem,
162
+ "conversation_progress": conversation_progress
163
+ }
164
+
165
+ all_examples.append(example)
166
+
167
+ except Exception as e:
168
+ print(f"Error processing {json_file}: {e}")
169
+
170
+ print(f"Generated {len(all_examples)} training examples from {len(json_files)} conversations")
171
+ return pd.DataFrame(all_examples)
172
+
173
+ def train(self, conversations_dir, test_size=0.2):
174
+ """Train the model on conversation data."""
175
+ # Process conversations
176
+ df = self.process_conversations(conversations_dir)
177
+
178
+ # Save the processed data
179
+ df.to_csv(os.path.join(self.output_dir, "processed_data.csv"), index=False)
180
+
181
+ # Split into features and targets
182
+ X = df["conversation_text"]
183
+ y = df[["thought_identification", "emotion_exploration", "distortion_identification",
184
+ "evidence_gathering", "balanced_thinking"]].values
185
+
186
+ # Split into train and test sets
187
+ X_train, X_test, y_train, y_test = train_test_split(
188
+ X, y, test_size=test_size, random_state=self.random_state
189
+ )
190
+
191
+ print(f"Training set size: {len(X_train)}")
192
+ print(f"Test set size: {len(X_test)}")
193
+
194
+ # Fit the vectorizer
195
+ print("Fitting vectorizer...")
196
+ X_train_vec = self.vectorizer.fit_transform(X_train)
197
+
198
+ # Train the model
199
+ print("Training model...")
200
+ self.model.fit(X_train_vec, y_train)
201
+
202
+ # Evaluate on test set
203
+ print("Evaluating on test set...")
204
+ X_test_vec = self.vectorizer.transform(X_test)
205
+ y_pred = self.model.predict(X_test_vec)
206
+
207
+ # Calculate metrics
208
+ mse = mean_squared_error(y_test, y_pred)
209
+ r2 = r2_score(y_test, y_pred)
210
+
211
+ print(f"Test MSE: {mse:.4f}")
212
+ print(f"Test R²: {r2:.4f}")
213
+
214
+ # Calculate metrics for each dimension
215
+ dimension_names = ["thought_identification", "emotion_exploration", "distortion_identification",
216
+ "evidence_gathering", "balanced_thinking"]
217
+
218
+ dimension_metrics = {}
219
+ for i, name in enumerate(dimension_names):
220
+ dim_mse = mean_squared_error(y_test[:, i], y_pred[:, i])
221
+ dim_r2 = r2_score(y_test[:, i], y_pred[:, i])
222
+ dimension_metrics[name] = {"mse": dim_mse, "r2": dim_r2}
223
+ print(f" {name}: MSE = {dim_mse:.4f}, R² = {dim_r2:.4f}")
224
+
225
+ # Save the model and vectorizer
226
+ print(f"Saving model to {self.output_dir}...")
227
+ joblib.dump(self.vectorizer, os.path.join(self.output_dir, "vectorizer.joblib"))
228
+ joblib.dump(self.model, os.path.join(self.output_dir, "model.joblib"))
229
+
230
+ # Save the metrics
231
+ with open(os.path.join(self.output_dir, "metrics.json"), "w") as f:
232
+ json.dump({
233
+ "overall": {"mse": mse, "r2": r2},
234
+ "dimensions": dimension_metrics
235
+ }, f, indent=2)
236
+
237
+ # Visualize predictions vs actual values
238
+ self.visualize_predictions(y_test, y_pred, dimension_names)
239
+
240
+ return mse, r2, dimension_metrics
241
+
242
+ def visualize_predictions(self, y_true, y_pred, dimension_names):
243
+ """Visualize predictions vs actual values."""
244
+ fig, axes = plt.subplots(2, 3, figsize=(15, 10))
245
+ axes = axes.flatten()
246
+
247
+ for i, name in enumerate(dimension_names):
248
+ ax = axes[i]
249
+ ax.scatter(y_true[:, i], y_pred[:, i], alpha=0.5)
250
+ ax.plot([0, 1], [0, 1], 'r--') # Diagonal line
251
+ ax.set_xlabel("Actual")
252
+ ax.set_ylabel("Predicted")
253
+ ax.set_title(name)
254
+ ax.set_xlim(0, 1)
255
+ ax.set_ylim(0, 1)
256
+
257
+ # Add R² value
258
+ r2 = r2_score(y_true[:, i], y_pred[:, i])
259
+ ax.text(0.05, 0.95, f"R² = {r2:.3f}", transform=ax.transAxes)
260
+
261
+ # Remove the unused subplot
262
+ fig.delaxes(axes[5])
263
+
264
+ # Add overall title
265
+ fig.suptitle("Predicted vs Actual Values", fontsize=16)
266
+ fig.tight_layout()
267
+
268
+ # Save the figure
269
+ fig.savefig(os.path.join(self.output_dir, "predictions_vs_actual.png"))
270
+ print(f"Visualization saved to {os.path.join(self.output_dir, 'predictions_vs_actual.png')}")
271
+
272
+ def predict(self, conversation_text):
273
+ """Predict CBT scores for a conversation."""
274
+ # Transform the text
275
+ X = self.vectorizer.transform([conversation_text])
276
+
277
+ # Make predictions
278
+ predictions = self.model.predict(X)[0]
279
+
280
+ # Ensure predictions are in the range [0, 1]
281
+ predictions = np.clip(predictions, 0, 1)
282
+
283
+ # Format the predictions
284
+ dimension_names = ["thought_identification", "emotion_exploration", "distortion_identification",
285
+ "evidence_gathering", "balanced_thinking"]
286
+
287
+ return {name: float(pred) for name, pred in zip(dimension_names, predictions)}
288
+
289
+ def count_turns(self, conversation_text):
290
+ """Count the number of turns in the conversation."""
291
+ # Count occurrences of "User:" and "Assistant:"
292
+ user_turns = len(re.findall(r"User:", conversation_text))
293
+ assistant_turns = len(re.findall(r"Assistant:", conversation_text))
294
+ return user_turns + assistant_turns
295
+
296
+ def identify_problem_from_text(self, conversation_text):
297
+ """Identify the likely cognitive distortion from the conversation text."""
298
+ # Extract the first user message
299
+ match = re.search(r"User: (.*?)(?:\n\n|$)", conversation_text)
300
+ if not match:
301
+ return "unspecified"
302
+
303
+ initial_input = match.group(1)
304
+ return self.identify_initial_problem(initial_input)
305
+
306
+ # Create a Flask API for the model
307
+ def create_flask_app(model_dir="cbt_evaluator_simple"):
308
+ app = Flask(__name__)
309
+
310
+ # Load the model and vectorizer
311
+ vectorizer = joblib.load(os.path.join(model_dir, "vectorizer.joblib"))
312
+ model = joblib.load(os.path.join(model_dir, "model.joblib"))
313
+
314
+ # Create an evaluator instance
315
+ evaluator = CBTEvaluatorSimple()
316
+ evaluator.vectorizer = vectorizer
317
+ evaluator.model = model
318
+
319
+ @app.route('/evaluate', methods=['POST'])
320
+ def evaluate_conversation():
321
+ """Evaluate a conversation."""
322
+ # Get conversation from request
323
+ data = request.json
324
+ conversation_text = data.get('conversation', '')
325
+
326
+ if not conversation_text:
327
+ return jsonify({"error": "No conversation provided"}), 400
328
+
329
+ # Get predictions
330
+ scores = evaluator.predict(conversation_text)
331
+
332
+ # Count turns and identify initial problem
333
+ total_turns = evaluator.count_turns(conversation_text)
334
+ initial_problem = evaluator.identify_problem_from_text(conversation_text)
335
+
336
+ # Create the final response
337
+ result = {
338
+ "id": f"eval_{uuid.uuid4().hex[:8]}_{datetime.now().strftime('%Y%m%d%H%M%S')}",
339
+ "conversation_text": conversation_text,
340
+ "target_scores": {
341
+ "thought_identification": round(scores["thought_identification"], 2),
342
+ "emotion_exploration": round(scores["emotion_exploration"], 2),
343
+ "distortion_identification": round(scores["distortion_identification"], 2),
344
+ "evidence_gathering": round(scores["evidence_gathering"], 2),
345
+ "balanced_thinking": round(scores["balanced_thinking"], 2)
346
+ },
347
+ "metadata": {
348
+ "total_turns": total_turns,
349
+ "turns_included": total_turns,
350
+ "initial_problem": initial_problem,
351
+ "conversation_progress": 1.0 # Assuming full conversation
352
+ }
353
+ }
354
+
355
+ return jsonify(result)
356
+
357
+ @app.route('/health', methods=['GET'])
358
+ def health_check():
359
+ """Health check endpoint."""
360
+ return jsonify({"status": "healthy"})
361
+
362
+ return app
363
+
364
+ # Example usage
365
+ if __name__ == "__main__":
366
+ # Check if we're training or serving
367
+ import sys
368
+
369
+ if len(sys.argv) > 1 and sys.argv[1] == "serve":
370
+ # Serve the model
371
+ app = create_flask_app()
372
+ app.run(debug=True, host='0.0.0.0', port=5009)
373
+ else:
374
+ # Train the model
375
+ evaluator = CBTEvaluatorSimple()
376
+ evaluator.train("/Users/saitejagudidevini/Documents/Dev/grpo_trainer/Evaluations/Gemini_QA_Testing/conversations")
377
+
378
+ print("\nTo serve the model, run:")
379
+ print("python cbt_evaluator_simple.py serve")