Spjimr / method_contracts.py
shahidshaikh's picture
Upload 40 files
a52bae4 verified
# ============================================================================
# method_contracts.py β€” FT50-publishability method contract layer
# ============================================================================
#
# PURPOSE
# -------
# Every computational qualitative method has preconditions that MUST hold for
# the method to be validly applied. This module makes those preconditions
# EXPLICIT and GREP-ABLE so that FT50 reviewers can verify the code enforces
# what the paper claims.
#
# Each contract is traced to a specific source paper and page number. A
# reviewer can:
# 1. grep this file for the paper citation (e.g. "B&C 2006 p. 88")
# and see every place that constraint is enforced
# 2. run any phase handler and see a MethodContractError message that names
# the paper, the page, and the violated rule
# 3. inspect any saved artifact and see the list of contracts verified
#
# DESIGN PRINCIPLES
# -----------------
# 1. Each contract has a citation to a specific paper + page.
# 2. Contracts raise MethodContractError, never bare Exception or AssertionError,
# so Gradio handlers can catch them cleanly and `python -O` cannot disable them.
# 3. Every check returns a list of MethodContract records, one per rule checked.
# 4. The contracts file is self-documenting β€” run `python method_contracts.py`
# to print the full contract registry.
# 5. No agent decisions live here. Contracts are deterministic Python β€” Layer 2
# of the three-layer rule (Generative / Plumbing / Researcher Authority).
#
# SOURCE PAPERS
# -------------
# B&C 2006:
# Braun, V. & Clarke, V. (2006). Using thematic analysis in psychology.
# Qualitative Research in Psychology, 3(2), 77-101.
#
# G&W 2022:
# Gauthier, R.P. & Wallace, J.R. (2022). The Computational Thematic Analysis
# Toolkit. Proc. ACM Hum.-Comput. Interact., 6(GROUP), Article 25.
#
# Nelson 2020:
# Nelson, L.K. (2020). Computational grounded theory: A methodological
# framework. Sociological Methods & Research, 49(1), 3-42.
#
# C&R 2022:
# Carlsen, H.B. & Ralund, S. (2022). Computational grounded theory revisited:
# From computer-led to computer-assisted text analysis. Big Data & Society, 9(1).
# ============================================================================
from dataclasses import dataclass, asdict
from datetime import datetime
from typing import List, Any, Optional
import pandas as pd
# ----------------------------------------------------------------
# Contract record β€” what gets logged to every artifact
# ----------------------------------------------------------------
@dataclass
class MethodContract:
"""One methodological precondition check.
Fields:
citation: Paper + page reference (e.g. "B&C 2006 p. 84")
rule: Plain-English rule being checked
status: "PASSED" or "FAILED: <reason>"
"""
citation: str
rule: str
status: str
# ----------------------------------------------------------------
# Exception β€” raised when any contract in a phase fails
# ----------------------------------------------------------------
class MethodContractError(Exception):
"""Raised when a method precondition is violated.
Carries the full list of contracts checked (passed and failed) so callers
can include the verification record in error artifacts.
"""
def __init__(self, message: str, contracts: List[MethodContract]):
super().__init__(message)
self.contracts = contracts
def as_dict(self) -> dict:
return {
"error": str(self),
"contracts": [asdict(c) for c in self.contracts],
"timestamp": datetime.now().isoformat(),
}
# ----------------------------------------------------------------
# Internal helper β€” raise if any contract failed
# ----------------------------------------------------------------
def _enforce(phase_name: str, contracts: List[MethodContract]) -> List[MethodContract]:
"""Raise MethodContractError if any contract failed; else return contracts.
This is the single choke-point through which every contract check runs.
Keep it simple β€” no agent decisions, no side effects.
"""
failed = [c for c in contracts if not c.status.startswith("PASSED")]
if failed:
details = "\n".join(
f" - {c.citation}: {c.rule} β€” {c.status}" for c in failed
)
raise MethodContractError(
f"{phase_name} β€” {len(failed)} method contract(s) violated:\n{details}",
contracts=contracts,
)
return contracts
# ============================================================================
# Phase 1 Familiarization β€” Braun & Clarke 2006 Phase 1
# ============================================================================
def check_phase1_familiarization(
corpus: Any,
reflexive_positioning: Optional[str],
) -> List[MethodContract]:
"""Verify preconditions for Phase 1 β€” Familiarization.
Enforces:
- B&C 2006 p. 87: researcher must immerse in the data (corpus non-empty)
- B&C 2006 reflexivity principle: researcher positioning must be stated
- B&C 2006 p. 87: dataset must contain more than a single sentence to
permit meaningful immersion
"""
contracts: List[MethodContract] = []
# B&C 2006 p. 87 β€” corpus presence
if corpus and len(corpus) >= 1:
contracts.append(MethodContract(
citation="B&C 2006 p. 87",
rule="corpus loaded for immersion (non-empty)",
status=f"PASSED ({len(corpus)} sentences)",
))
else:
contracts.append(MethodContract(
citation="B&C 2006 p. 87",
rule="corpus loaded for immersion (non-empty)",
status=f"FAILED: corpus is empty or None",
))
# B&C 2006 reflexivity β€” positioning statement
pos = (reflexive_positioning or "").strip()
if len(pos) >= 20:
contracts.append(MethodContract(
citation="B&C 2006 reflexivity principle",
rule="reflexive positioning statement articulated (>=20 chars)",
status=f"PASSED ({len(pos)} chars)",
))
else:
contracts.append(MethodContract(
citation="B&C 2006 reflexivity principle",
rule="reflexive positioning statement articulated (>=20 chars)",
status=f"FAILED: positioning is {len(pos)} chars (need >=20)",
))
# B&C 2006 p. 87 β€” meaningful immersion
if corpus and len(corpus) >= 5:
contracts.append(MethodContract(
citation="B&C 2006 p. 87",
rule="corpus large enough for meaningful immersion (>=5 sentences)",
status=f"PASSED ({len(corpus)} sentences)",
))
else:
n = len(corpus) if corpus else 0
contracts.append(MethodContract(
citation="B&C 2006 p. 87",
rule="corpus large enough for meaningful immersion (>=5 sentences)",
status=f"FAILED: only {n} sentence(s) in corpus",
))
return _enforce("Phase 1 β€” Familiarization", contracts)
# ============================================================================
# Phase 1.5 G&W Corpus Compression β€” Gauthier & Wallace 2022
# ============================================================================
def check_phase0_compression(
corpus: Any,
sentences_per_cluster: int,
min_cluster_size: int,
outlier_sample_size: int,
) -> List[MethodContract]:
"""Verify preconditions for Phase 0 β€” Corpus Compression (G&W path).
Enforces:
- G&W 2022 Art. 25: compression requires a corpus to compress (non-empty)
- G&W 2022 Art. 25: clustering parameters within valid ranges
- G&W 2022 Art. 25: compression is meaningful only when the corpus is
at least min_cluster_size * 2 sentences β€” otherwise HDBSCAN cannot
form stable clusters and the researcher should skip compression
"""
contracts: List[MethodContract] = []
n = len(corpus) if corpus else 0
# G&W 2022 β€” corpus presence
contracts.append(MethodContract(
citation="G&W 2022 Art. 25",
rule="corpus non-empty (compression requires input)",
status="PASSED (" + str(n) + " sentences)" if n > 0 else "FAILED: empty corpus",
))
# G&W 2022 β€” sentences_per_cluster range
contracts.append(MethodContract(
citation="G&W 2022 Art. 25",
rule="sentences_per_cluster in [1, 10]",
status="PASSED (" + str(sentences_per_cluster) + ")" if 1 <= sentences_per_cluster <= 10 else "FAILED: got " + str(sentences_per_cluster),
))
# G&W 2022 β€” min_cluster_size range
contracts.append(MethodContract(
citation="G&W 2022 Art. 25",
rule="min_cluster_size >= 2 (HDBSCAN requirement)",
status="PASSED (" + str(min_cluster_size) + ")" if min_cluster_size >= 2 else "FAILED: got " + str(min_cluster_size),
))
# G&W 2022 β€” outlier_sample_size non-negative
contracts.append(MethodContract(
citation="G&W 2022 Art. 25",
rule="outlier_sample_size >= 0",
status="PASSED (" + str(outlier_sample_size) + ")" if outlier_sample_size >= 0 else "FAILED: got " + str(outlier_sample_size),
))
# G&W 2022 β€” corpus large enough for compression to be meaningful
min_corpus = min_cluster_size * 2
if n >= min_corpus:
contracts.append(MethodContract(
citation="G&W 2022 Art. 25",
rule="corpus size >= 2 * min_cluster_size (compression is meaningful)",
status="PASSED (" + str(n) + " >= " + str(min_corpus) + ")",
))
else:
contracts.append(MethodContract(
citation="G&W 2022 Art. 25",
rule="corpus size >= 2 * min_cluster_size (compression is meaningful)",
status=f"FAILED: {n} < {min_corpus} β€” skip compression, use full corpus",
))
return _enforce("Phase 0 β€” Corpus Compression", contracts)
# ============================================================================
# Phase 2 Initial Coding β€” Braun & Clarke 2006 Phase 2
# ============================================================================
def check_phase2_initial_coding(
orientation: Optional[str],
corpus: Any,
reflexive_positioning: Optional[str],
llm_key: Optional[str],
iteration_n: int,
) -> List[MethodContract]:
"""Verify preconditions for Phase 2 β€” Generating Initial Codes.
Enforces:
- B&C 2006 p. 84: orientation is an analysis-wide choice
(semantic OR latent, not both, not per-sentence)
- B&C 2006 p. 88: systematic coverage β€” every sentence gets coded,
requires non-empty corpus
- B&C 2006 reflexivity: reflexive positioning must be injected into
every code-generation prompt (C&R 2022 insists on this)
- Reproducibility: LLM API key must be present for deterministic runs
- B&C 2006 iterative refinement: iteration_n in {1, 2, 3}
"""
contracts: List[MethodContract] = []
# B&C 2006 p. 84 β€” orientation is analysis-wide
if orientation in ("semantic", "latent"):
contracts.append(MethodContract(
citation="B&C 2006 p. 84",
rule="orientation in {semantic, latent} (analysis-wide choice)",
status=f"PASSED ({orientation})",
))
else:
contracts.append(MethodContract(
citation="B&C 2006 p. 84",
rule="orientation in {semantic, latent} (analysis-wide choice)",
status=f"FAILED: got {orientation!r}",
))
# B&C 2006 p. 88 β€” systematic coverage
n = len(corpus) if corpus else 0
if n >= 1:
contracts.append(MethodContract(
citation="B&C 2006 p. 88",
rule="systematic coverage (corpus non-empty)",
status=f"PASSED ({n} sentences to code)",
))
else:
contracts.append(MethodContract(
citation="B&C 2006 p. 88",
rule="systematic coverage (corpus non-empty)",
status="FAILED: empty corpus β€” cannot code systematically",
))
# B&C 2006 reflexivity + C&R 2022 computer-assisted principle
pos = (reflexive_positioning or "").strip()
if len(pos) >= 20:
contracts.append(MethodContract(
citation="B&C 2006 reflexivity + C&R 2022 BDS 9(1)",
rule="reflexive positioning injected into every code-generation prompt",
status=f"PASSED ({len(pos)} chars injected)",
))
else:
contracts.append(MethodContract(
citation="B&C 2006 reflexivity + C&R 2022 BDS 9(1)",
rule="reflexive positioning injected into every code-generation prompt",
status=f"FAILED: positioning is {len(pos)} chars β€” complete Phase 1 first",
))
# Reproducibility β€” LLM key required
key = (llm_key or "").strip()
if len(key) >= 10:
contracts.append(MethodContract(
citation="Reproducibility (FT50 audit)",
rule="LLM API key present for deterministic coding calls",
status=f"PASSED (key length {len(key)})",
))
else:
contracts.append(MethodContract(
citation="Reproducibility (FT50 audit)",
rule="LLM API key present for deterministic coding calls",
status="FAILED: API key missing β€” paste in sidebar",
))
# B&C 2006 iterative refinement
if iteration_n in (1, 2, 3):
contracts.append(MethodContract(
citation="B&C 2006 iterative refinement",
rule="iteration_n in {1, 2, 3}",
status=f"PASSED (iteration {iteration_n})",
))
else:
contracts.append(MethodContract(
citation="B&C 2006 iterative refinement",
rule="iteration_n in {1, 2, 3}",
status=f"FAILED: got iteration_n={iteration_n}",
))
return _enforce("Phase 2 β€” Generating Initial Codes", contracts)
# ============================================================================
# Phase 3 Searching for Themes β€” Braun & Clarke 2006 Phase 3
# ============================================================================
def check_phase3_searching_themes(
codebook_table: Any,
similarity_threshold: float,
min_cluster_size: int,
llm_key: Optional[str],
) -> List[MethodContract]:
"""Verify preconditions for Phase 3 β€” Searching for Themes.
Enforces:
- B&C 2006 p. 89: themes emerge from codes β€” codebook must have entries
- B&C 2006 p. 89: themes are tentative, iterative β€” threshold must be in
a sensible exploration range (0.3 to 0.95)
- Clustering validity: min_cluster_size >= 2
- Reproducibility: LLM key required for theme naming
"""
contracts: List[MethodContract] = []
# B&C 2006 p. 89 β€” codebook presence
if isinstance(codebook_table, pd.DataFrame):
n_codes = len(codebook_table)
elif codebook_table:
n_codes = len(codebook_table)
else:
n_codes = 0
if n_codes >= 2:
contracts.append(MethodContract(
citation="B&C 2006 p. 89",
rule="codebook has >=2 codes (themes emerge from codes)",
status=f"PASSED ({n_codes} codes in codebook)",
))
else:
contracts.append(MethodContract(
citation="B&C 2006 p. 89",
rule="codebook has >=2 codes (themes emerge from codes)",
status=f"FAILED: {n_codes} codes β€” run Phase 2 iterations first",
))
# B&C 2006 p. 89 β€” similarity threshold exploration range
if 0.3 <= similarity_threshold <= 0.95:
contracts.append(MethodContract(
citation="B&C 2006 p. 89",
rule="similarity_threshold in [0.3, 0.95] (themes are tentative)",
status=f"PASSED ({similarity_threshold:.2f})",
))
else:
contracts.append(MethodContract(
citation="B&C 2006 p. 89",
rule="similarity_threshold in [0.3, 0.95] (themes are tentative)",
status=f"FAILED: got {similarity_threshold}",
))
# Clustering validity β€” min_cluster_size
if min_cluster_size >= 2:
contracts.append(MethodContract(
citation="Clustering validity",
rule="min_cluster_size >= 2 (agglomerative clustering requirement)",
status=f"PASSED ({min_cluster_size})",
))
else:
contracts.append(MethodContract(
citation="Clustering validity",
rule="min_cluster_size >= 2 (agglomerative clustering requirement)",
status=f"FAILED: got {min_cluster_size}",
))
# Reproducibility β€” LLM key
key = (llm_key or "").strip()
if len(key) >= 10:
contracts.append(MethodContract(
citation="Reproducibility (FT50 audit)",
rule="LLM API key present for deterministic theme naming",
status=f"PASSED (key length {len(key)})",
))
else:
contracts.append(MethodContract(
citation="Reproducibility (FT50 audit)",
rule="LLM API key present for deterministic theme naming",
status="FAILED: API key missing",
))
return _enforce("Phase 3 β€” Searching for Themes", contracts)
# ============================================================================
# Phase 4 Reviewing Themes β€” Braun & Clarke 2006 Phase 4
# ============================================================================
def check_phase4_reviewing_themes(
themes_table: Any,
codes_table: Any,
llm_key: Optional[str],
) -> List[MethodContract]:
"""Verify preconditions for Phase 4 β€” Reviewing Themes.
Enforces:
- B&C 2006 p. 91: review requires candidate themes from Phase 3
- B&C 2006 p. 91: Level 1 check (coded extracts) requires codes_table
- Reproducibility: LLM key required for verdict generation
"""
contracts: List[MethodContract] = []
# B&C 2006 p. 91 β€” themes from Phase 3
n_themes = 0
if isinstance(themes_table, pd.DataFrame):
n_themes = len(themes_table)
elif themes_table:
n_themes = len(themes_table)
if n_themes >= 1:
contracts.append(MethodContract(
citation="B&C 2006 p. 91",
rule="candidate themes present (>=1 from Phase 3)",
status=f"PASSED ({n_themes} themes)",
))
else:
contracts.append(MethodContract(
citation="B&C 2006 p. 91",
rule="candidate themes present (>=1 from Phase 3)",
status="FAILED: no themes β€” run Phase 3 first",
))
# B&C 2006 p. 91 β€” codes for Level 1 cohesion check
n_codes_rows = 0
if isinstance(codes_table, pd.DataFrame):
n_codes_rows = len(codes_table)
elif codes_table:
n_codes_rows = len(codes_table)
if n_codes_rows >= 1:
contracts.append(MethodContract(
citation="B&C 2006 p. 91 (Level 1 cohesion check)",
rule="coded sentences present for cohesion computation",
status=f"PASSED ({n_codes_rows} coded rows)",
))
else:
contracts.append(MethodContract(
citation="B&C 2006 p. 91 (Level 1 cohesion check)",
rule="coded sentences present for cohesion computation",
status="FAILED: no codes β€” Phase 2 output missing",
))
# Reproducibility
key = (llm_key or "").strip()
if len(key) >= 10:
contracts.append(MethodContract(
citation="Reproducibility (FT50 audit)",
rule="LLM API key present for deterministic verdict generation",
status=f"PASSED (key length {len(key)})",
))
else:
contracts.append(MethodContract(
citation="Reproducibility (FT50 audit)",
rule="LLM API key present for deterministic verdict generation",
status="FAILED: API key missing",
))
return _enforce("Phase 4 β€” Reviewing Themes", contracts)
# ============================================================================
# Phase 5 Defining and Naming β€” Braun & Clarke 2006 Phase 5
# ============================================================================
def check_phase5_defining_naming(
review_table: Any,
llm_key: Optional[str],
) -> List[MethodContract]:
"""Verify preconditions for Phase 5 β€” Defining and Naming Themes.
Enforces:
- B&C 2006 p. 92: defining requires reviewed themes from Phase 4
- B&C 2006 p. 92: review_table must distinguish keep/merge/drop verdicts
- Reproducibility: LLM key required for definition generation
"""
contracts: List[MethodContract] = []
# B&C 2006 p. 92 β€” review_table must exist and be populated
n = 0
if isinstance(review_table, pd.DataFrame):
n = len(review_table)
elif review_table:
n = len(review_table)
if n >= 1:
contracts.append(MethodContract(
citation="B&C 2006 p. 92",
rule="reviewed themes present from Phase 4 (>=1)",
status=f"PASSED ({n} reviewed themes)",
))
else:
contracts.append(MethodContract(
citation="B&C 2006 p. 92",
rule="reviewed themes present from Phase 4 (>=1)",
status="FAILED: no reviewed themes β€” run Phase 4 first",
))
# B&C 2006 p. 92 β€” verdicts column present (method machinery)
if isinstance(review_table, pd.DataFrame) and "researcher_verdict" in review_table.columns:
contracts.append(MethodContract(
citation="B&C 2006 p. 92",
rule="verdict column present (method machinery)",
status="PASSED (researcher_verdict column found)",
))
elif n == 0:
# already caught above, avoid double-fail noise
contracts.append(MethodContract(
citation="B&C 2006 p. 92",
rule="verdict column present (method machinery)",
status="PASSED (skipped β€” no review rows)",
))
else:
contracts.append(MethodContract(
citation="B&C 2006 p. 92",
rule="verdict column present (method machinery)",
status="FAILED: researcher_verdict column missing from review_table",
))
# Reproducibility
key = (llm_key or "").strip()
if len(key) >= 10:
contracts.append(MethodContract(
citation="Reproducibility (FT50 audit)",
rule="LLM API key present for deterministic definition generation",
status=f"PASSED (key length {len(key)})",
))
else:
contracts.append(MethodContract(
citation="Reproducibility (FT50 audit)",
rule="LLM API key present for deterministic definition generation",
status="FAILED: API key missing",
))
return _enforce("Phase 5 β€” Defining and Naming Themes", contracts)
# ============================================================================
# Phase 6 Producing the Report β€” Braun & Clarke 2006 Phase 6
# ============================================================================
def check_phase6_producing_report(
def_table: Any,
llm_key: Optional[str],
) -> List[MethodContract]:
"""Verify preconditions for Phase 6 β€” Producing the Report.
Enforces:
- B&C 2006 p. 93: report requires theme definitions from Phase 5
- B&C 2006 p. 93: report must weave definitions + extracts + narrative
- Reproducibility: LLM key required for narrative generation
"""
contracts: List[MethodContract] = []
# B&C 2006 p. 93 β€” definitions from Phase 5
n = 0
if isinstance(def_table, pd.DataFrame):
n = len(def_table)
elif def_table:
n = len(def_table)
if n >= 1:
contracts.append(MethodContract(
citation="B&C 2006 p. 93",
rule="theme definitions present from Phase 5 (>=1)",
status=f"PASSED ({n} definitions)",
))
else:
contracts.append(MethodContract(
citation="B&C 2006 p. 93",
rule="theme definitions present from Phase 5 (>=1)",
status="FAILED: no definitions β€” run Phase 5 first",
))
# Reproducibility
key = (llm_key or "").strip()
if len(key) >= 10:
contracts.append(MethodContract(
citation="Reproducibility (FT50 audit)",
rule="LLM API key present for deterministic narrative generation",
status=f"PASSED (key length {len(key)})",
))
else:
contracts.append(MethodContract(
citation="Reproducibility (FT50 audit)",
rule="LLM API key present for deterministic narrative generation",
status="FAILED: API key missing",
))
return _enforce("Phase 6 β€” Producing the Report", contracts)
# ============================================================================
# CGT Phase 2 β€” Pattern Refinement β€” Nelson 2020 Step 2 / C&R 2022
# ============================================================================
def check_cgt_phase2_refinement(
sentences_df: Any,
n_exemplars: int,
reflexive_positioning: Optional[str],
llm_key: Optional[str],
) -> List[MethodContract]:
"""Verify preconditions for CGT Phase 2 β€” Pattern Refinement.
Enforces:
- Nelson 2020: Phase 2 requires Phase 1 output (sentences_df with cluster_id)
- Nelson 2020: at least 1 non-noise cluster to refine
- Nelson 2020: n_exemplars in [1, 20] β€” deep reading is bounded
- C&R 2022: researcher reflexive positioning present (>=20 chars)
- Reproducibility: LLM API key present for deterministic memo drafting
"""
contracts: List[MethodContract] = []
# Nelson 2020 β€” Phase 1 output must exist
n_rows = 0
has_cluster_id = False
if isinstance(sentences_df, pd.DataFrame):
n_rows = len(sentences_df)
has_cluster_id = "cluster_id" in sentences_df.columns
elif sentences_df:
n_rows = len(sentences_df)
if n_rows >= 1 and has_cluster_id:
contracts.append(MethodContract(
citation="Nelson 2020 SMR 49(1)",
rule="Phase 1 output (sentences_df with cluster_id) non-empty",
status=f"PASSED ({n_rows} sentences with cluster_id)",
))
else:
contracts.append(MethodContract(
citation="Nelson 2020 SMR 49(1)",
rule="Phase 1 output (sentences_df with cluster_id) non-empty",
status="FAILED: run Phase 1 Pattern Detection first",
))
# Nelson 2020 β€” at least 1 non-noise cluster
n_clusters = 0
if isinstance(sentences_df, pd.DataFrame) and has_cluster_id:
non_noise = sentences_df[
sentences_df["cluster_id"].astype(str).str.lower() != "noise"
]
n_clusters = non_noise["cluster_id"].nunique() if len(non_noise) > 0 else 0
if n_clusters >= 1:
contracts.append(MethodContract(
citation="Nelson 2020 SMR 49(1)",
rule="at least 1 non-noise cluster to refine",
status=f"PASSED ({n_clusters} clusters found)",
))
else:
contracts.append(MethodContract(
citation="Nelson 2020 SMR 49(1)",
rule="at least 1 non-noise cluster to refine",
status=f"FAILED: 0 non-noise clusters β€” Phase 1 produced only noise",
))
# Nelson 2020 β€” n_exemplars range
if 1 <= int(n_exemplars) <= 20:
contracts.append(MethodContract(
citation="Nelson 2020 deep-reading principle",
rule="n_exemplars in [1, 20] (bounded for tractable close reading)",
status=f"PASSED ({n_exemplars})",
))
else:
contracts.append(MethodContract(
citation="Nelson 2020 deep-reading principle",
rule="n_exemplars in [1, 20] (bounded for tractable close reading)",
status=f"FAILED: got {n_exemplars}",
))
# C&R 2022 β€” reflexive positioning
pos = (reflexive_positioning or "").strip()
if len(pos) >= 20:
contracts.append(MethodContract(
citation="C&R 2022 BDS 9(1) researcher-centrality",
rule="reflexive positioning articulated (>=20 chars)",
status=f"PASSED ({len(pos)} chars)",
))
else:
contracts.append(MethodContract(
citation="C&R 2022 BDS 9(1) researcher-centrality",
rule="reflexive positioning articulated (>=20 chars)",
status=f"FAILED: positioning is {len(pos)} chars (need >=20)",
))
# Reproducibility β€” LLM key
key = (llm_key or "").strip()
if len(key) >= 10:
contracts.append(MethodContract(
citation="Reproducibility (FT50 audit)",
rule="LLM API key present for deterministic memo drafting",
status=f"PASSED (key length {len(key)})",
))
else:
contracts.append(MethodContract(
citation="Reproducibility (FT50 audit)",
rule="LLM API key present for deterministic memo drafting",
status="FAILED: API key missing",
))
return _enforce("CGT Phase 2 β€” Pattern Refinement", contracts)
# ============================================================================
# Helper β€” serialize contracts for artifact logging
# ============================================================================
def contracts_as_dicts(contracts: List[MethodContract]) -> List[dict]:
"""Convert a list of MethodContract records to dicts for JSON artifact storage.
Every phase handler should include this in its saved artifact under the
key `method_contracts_verified`, so reviewers can inspect per-run proof
that the method's preconditions held.
"""
return [asdict(c) for c in contracts]
# ============================================================================
# Registry β€” for self-documentation and reviewer audit
# ============================================================================
CONTRACT_REGISTRY = {
"Phase 1 β€” Familiarization": check_phase1_familiarization,
"Phase 0 β€” Corpus Compression (G&W)": check_phase0_compression,
"Phase 2 β€” Generating Initial Codes": check_phase2_initial_coding,
"Phase 3 β€” Searching for Themes": check_phase3_searching_themes,
"Phase 4 β€” Reviewing Themes": check_phase4_reviewing_themes,
"Phase 5 β€” Defining and Naming Themes": check_phase5_defining_naming,
"Phase 6 β€” Producing the Report": check_phase6_producing_report,
"CGT Phase 2 β€” Pattern Refinement": check_cgt_phase2_refinement,
}
# ============================================================================
# Self-documentation β€” run `python method_contracts.py` to see all contracts
# ============================================================================
if __name__ == "__main__":
print("=" * 78)
print("METHOD CONTRACT REGISTRY β€” FT50 Publishability Layer")
print("=" * 78)
print()
print("Source papers:")
print(" B&C 2006 : Braun & Clarke, Qualitative Research in Psychology 3(2), 77-101")
print(" G&W 2022 : Gauthier & Wallace, PACMHCI 6(GROUP), Article 25")
print(" Nelson 2020: Sociological Methods & Research 49(1), 3-42")
print(" C&R 2022 : Carlsen & Ralund, Big Data & Society 9(1)")
print()
print("Phases with method contracts:")
for phase_name, fn in CONTRACT_REGISTRY.items():
print(f" * {phase_name}")
# Parse the docstring for 'Enforces:' section
doc = fn.__doc__ or ""
lines = doc.splitlines()
in_enforces = False
for ln in lines:
stripped = ln.strip()
if stripped.startswith("Enforces:"):
in_enforces = True
continue
if in_enforces:
if not stripped:
break
print(f" {stripped}")
print()
print("=" * 78)
print("Usage: import these checks at the top of each phase handler in app.py")
print(" and call the relevant check_* function before running the phase.")
print("=" * 78)