| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| from collections import Counter |
| from training_data import TRAINING_EXAMPLES |
| from .tools import code_sentences |
|
|
|
|
| def phase2_initial_codes_node(state): |
| max_n = state["max_sentences_to_code"] |
| examples = TRAINING_EXAMPLES[:max_n] |
| sentences = [e["sentence"] for e in examples] |
| true_labels = [e["label"] for e in examples] |
|
|
| codes = code_sentences( |
| sentences=sentences, |
| llm_provider=state["llm_provider"], |
| llm_key=state["llm_key"], |
| ) |
|
|
| coded_rows = [ |
| { |
| "idx": i, |
| "sentence": sentences[i], |
| "true_label": true_labels[i], |
| "llm_code": codes[i], |
| } |
| for i in range(len(sentences)) |
| ] |
|
|
| code_counts = dict(Counter(codes)) |
|
|
| return { |
| "phase2_initial_codes": { |
| "status": "real", |
| "n_sentences_coded": len(coded_rows), |
| "n_unique_codes": len(code_counts), |
| "coded_rows": coded_rows, |
| "code_frequency": code_counts, |
| }, |
| "steps": [{ |
| "step": state.get("iteration", 0), |
| "node": "phase2_initial_codes", |
| "action": "coded sentences (one LLM call per sentence)", |
| "detail": f"{len(coded_rows)} sentences, {len(code_counts)} unique codes", |
| }], |
| } |
|
|