# ============================================================================ # methodology_comparison.py — reference paper vs our technique, per workbench # ============================================================================ # # Principle: Same methodological rigor as the reference paper. Latest # best-in-class computational technique. Every step upgraded technically; # every methodological commitment preserved. # # One MethodologyComparison per workbench. Each has: # - principle: header paragraph for the paper's methods section # - reference_papers: list of full citations # - rows: per-step 4-column comparison # # Serialized to Markdown for download + injection into papers. # ============================================================================ from dataclasses import dataclass, field from typing import List from datetime import datetime @dataclass class ComparisonRow: """One step in the methodology comparison table.""" step: str commitment: str # Methodological commitment (unchanged across ref and ours) reference_technique: str # What the reference paper used (2020-2022 tech) our_technique: str # What we use (2026 best-in-class) + why better @dataclass class MethodologyComparison: """Full comparison for one workbench, paper-ready.""" workbench_name: str reference_papers: List[str] principle: str rows: List[ComparisonRow] = field(default_factory=list) def as_markdown(self) -> str: """Render as paper-ready Markdown — copy-paste into methods section.""" lines = [ f"# Methodology Comparison — {self.workbench_name}", "", f"*Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*", "", "## Principle", "", self.principle, "", "## Reference Papers", "", ] for p in self.reference_papers: lines.append(f"- {p}") lines.append("") lines.append("## Step-by-Step Comparison") lines.append("") lines.append("| Step | Methodological commitment | Reference technique (2020-2022) | Our technique (2026) + why better |") lines.append("|---|---|---|---|") for r in self.rows: # Escape pipes in cell content to avoid breaking markdown table step = r.step.replace("|", "\\|") commit = r.commitment.replace("|", "\\|").replace("\n", "
") ref = r.reference_technique.replace("|", "\\|").replace("\n", "
") ours = r.our_technique.replace("|", "\\|").replace("\n", "
") lines.append(f"| **{step}** | {commit} | {ref} | {ours} |") lines.append("") lines.append("---") lines.append("") lines.append("*This comparison was auto-generated by the Researcher Workbench. " "Paste directly into the methods section of your paper. " "All method contracts referenced above are enforced in code — see `method_contracts.py` " "for the grep-able registry.*") return "\n".join(lines) # ============================================================================ # B&C Workbench — Braun & Clarke 2006 reflexive thematic analysis # ============================================================================ BC_COMPARISON = MethodologyComparison( workbench_name="B&C Workbench (Reflexive Thematic Analysis)", reference_papers=[ "Braun, V. & Clarke, V. (2006). Using thematic analysis in psychology. " "Qualitative Research in Psychology, 3(2), 77-101.", "Carlsen, H.B. & Ralund, S. (2022). Computational grounded theory revisited: " "From computer-led to computer-assisted. Big Data & Society, 9(1).", ], principle=( "We preserve the full methodological rigor of Braun & Clarke's (2006) six-phase " "reflexive thematic analysis — reflexivity, systematic coverage, " "semantic-or-latent analysis-wide choice, iterative refinement, researcher authority. " "Every phase is implemented with the best computational technique available in 2026: " "LLM-assisted code generation at pinned temperature 0.0, transformer-based embeddings " "for theme clustering, embedding cohesion checks for theme review, and paper-cited " "method contracts enforced in Python. The researcher validates every AI output via " "named override widgets. Carlsen & Ralund's (2022) researcher-centrality principle " "is preserved: AI assists, researcher approves." ), rows=[ ComparisonRow( step="Phase 1 — Familiarization", commitment="B&C 2006 p. 87: researcher immerses in data, articulates reflexive positioning, confirms initial noticings before coding", reference_technique="Manual reading of full corpus; notes in research journal; no computational assistance", our_technique="LLM-facilitated dialogue (Mistral temp=0.0) + reflexive positioning as contract-enforced field (≥20 chars) + three-step validation table. Better: scales to 1000+ sentence corpora without abandoning reflexivity; positioning statement is auditable.", ), ComparisonRow( step="Phase 2 — Initial Coding", commitment="B&C 2006 p. 84: semantic XOR latent orientation (analysis-wide). p. 88: systematic coverage (every sentence coded). Reflexivity: researcher's positioning shapes every code.", reference_technique="Researcher manually codes each sentence in a spreadsheet over weeks. No validation other than researcher re-reading.", our_technique="Mistral temp=0.0 proposes codes across 3 iterations; reflexive positioning injected per prompt; researcher overrides via `human_code_iter1/2/3` + `flagged` + `final_code` columns. Hallucination bounded by exact-sentence-quote requirement. Reproducibility: identical corpus → identical codes. Contract: B&C 2006 p. 84, p. 88, reflexivity × 5.", ), ComparisonRow( step="Phase 3 — Searching for Themes", commitment="B&C 2006 p. 89: themes emerge from codes; patterns meaningful to research question; themes are tentative, iterative", reference_technique="Researcher manually groups codes into themes on paper, sticky notes, or mind-map software. No computational clustering.", our_technique="MiniLM 384-dim embeddings of codes + agglomerative clustering (cosine similarity, threshold ∈ [0.3, 0.95]) + Mistral names each cluster + researcher renames in theme table. Deterministic given fixed seed. Better: reveals semantic theme coherence invisible to manual grouping; researcher still decides final names.", ), ComparisonRow( step="Phase 4 — Reviewing Themes", commitment="B&C 2006 p. 91: Level 1 check (coded extracts cohere within theme) + Level 2 check (themes work across corpus)", reference_technique="Researcher manually re-reads coded extracts against themes; refines or drops themes through discussion or introspection", our_technique="Embedding-based cohesion score per theme (cluster tightness) + Mistral drafts keep/merge/split/drop/rename verdict + researcher enters `researcher_verdict`. Contract: B&C 2006 p. 91 × 3. Better: cohesion scores surface weak themes the researcher might miss; researcher still decides fate.", ), ComparisonRow( step="Phase 5 — Defining and Naming", commitment="B&C 2006 p. 92: each theme has a clear definition and a catchy name capturing its essence", reference_technique="Researcher drafts theme definitions by hand based on coded extracts", our_technique="Mistral drafts definition + catchy name per kept theme; researcher overrides via `researcher_definition` + `researcher_name` columns. Contract: B&C 2006 p. 92 × 3. Better: draft saves hours; researcher still authors final definitions.", ), ComparisonRow( step="Phase 6 — Producing the Report", commitment="B&C 2006 p. 93: weave theme definitions + data extracts + narrative answering research question", reference_technique="Researcher writes full report manually, pulling extracts from coded dataset", our_technique="Mistral drafts markdown report from definitions + codes + research question + reflexive positioning; researcher edits before save. Report methods section auto-includes this comparison table. Contract: B&C 2006 p. 93 × 2.", ), ], ) # ============================================================================ # G&W at Scale — Gauthier & Wallace 2022 computational thematic analysis # ============================================================================ GW_COMPARISON = MethodologyComparison( workbench_name="G&W at Scale (Computational Thematic Analysis)", reference_papers=[ "Gauthier, R.P. & Wallace, J.R. (2022). The Computational Thematic Analysis Toolkit. " "Proc. ACM Hum.-Comput. Interact., 6(GROUP), Article 25.", "Braun, V. & Clarke, V. (2006). Using thematic analysis in psychology. " "Qualitative Research in Psychology, 3(2), 77-101.", "Carlsen, H.B. & Ralund, S. (2022). Computational grounded theory revisited. " "Big Data & Society, 9(1).", ], principle=( "We preserve the full methodological rigor of Gauthier & Wallace's (2022) " "Computational Thematic Analysis Toolkit — corpus compression before coding, " "researcher validation of representative selection, reflexive engagement with " "computationally-surfaced patterns. The core upgrade is architectural: we operate " "at the sentence level using MiniLM contextual embeddings (384-dim transformer), " "whereas G&W 2022 operated at the word level using bag-of-words LDA. G&W's Data " "Cleaning (module 2) and Data Filtering (module 3) modules are therefore not " "applicable to our pipeline — their purpose was to make word-frequency topic " "modelling tractable, a problem that does not arise when semantic similarity is " "computed directly over sentence embeddings. All downstream Braun & Clarke (2006) " "Phase 1-6 commitments are preserved; Carlsen & Ralund's (2022) researcher-" "centrality is enforced throughout. Phase 0 compression runs before Phase 1 " "familiarization, following G&W's own framing of computational operations as " "familiarization aids for large corpora." ), rows=[ ComparisonRow( step="Phase 0 — Corpus Compression", commitment="G&W 2022 Art. 25: reduce large corpus to representative subset preserving semantic diversity; researcher validates selection before downstream phases consume it", reference_technique="Word-level pipeline across four G&W modules: spaCy tokenization + stopword removal + lemmatization (module 2 Data Cleaning) + word include/exclude + frequency thresholds (module 3 Data Filtering) + LDA bag-of-words topic modelling with researcher-chosen k (module 4 Modelling) + purposive sampling near topic centroids (module 5 Sampling). Cleaning and filtering were required because LDA operates on word frequencies and collapses under raw text (stopwords dominate; morphology fragments signal).", our_technique=( "Sentence-level pipeline with peer-reviewed citation chain: " "(1) MiniLM all-MiniLM-L6-v2 sentence embeddings, 384-dim contextual transformer (Reimers & Gurevych 2019, EMNLP) — captures syntax, semantics, word order in one pass, obviates word-level cleaning. " "(2) UMAP dimensionality reduction to 10-dim for clustering stability (McInnes, Healy & Melville 2018). " "(3) HDBSCAN hierarchical density-based clustering (Campello, Moulavi & Sander 2013, PAKDD, LNCS 7819:160–172; extended in Campello, Moulavi, Zimek & Sander 2015, ACM TKDD 10(1)). Cluster count discovered from data; min_cluster_size parameter is Campello et al.'s explicit mclSize. " "(4) Representative selection by HDBSCAN density-tree cluster membership probability, ranked descending, top R per cluster (Campello et al. 2015 §4). NOT centroid-proximity — HDBSCAN produces non-spherical clusters where centroid-based selection is known to misrepresent (Grootendorst 2022, BERTopic). The probability score is 1.0 at the heart of a cluster's density region and 0.0 at the noise edge; ranking by this score is the methodologically native selection for density-based clustering. " "(5) Software: McInnes, Healy & Astels 2017, JOSS 2(11):205 — hdbscan library. " "(6) Researcher validation via editable `selected` column (Carlsen & Ralund 2022, BDS 9(1) researcher-centrality). " "Cleaning and filtering modules are NOT APPLICABLE — our pipeline operates on sentence meaning not word frequency; stopwords carry semantic signal and must not be removed; morphology is handled inside MiniLM's subword tokenizer. Temp=0.0 throughout. Deterministic given fixed corpus (UMAP random_state=42; HDBSCAN deterministic given fixed input; outlier sampling np.random.seed(42)). Contract: G&W 2022 Art. 25 × 5. " "Better than LDA: eliminates methodological drift from cleaning rules (different stopword lists → different LDA topics), eliminates researcher guesswork on k, produces reproducible output aligned to density rather than to spherical-cluster assumption." ), ), ComparisonRow( step="Phase 1 — Familiarization (on compressed corpus)", commitment="B&C 2006 p. 87: researcher immerses in data, articulates reflexive positioning, confirms noticings. G&W 2022: on compressed corpus so familiarization is tractable at scale.", reference_technique="G&W 2022 treated computational exploration itself as familiarization — no distinct Phase 1. Researcher browsed LDA topic keyword lists, adjusted filtering rules, manually reviewed samples.", our_technique="Explicit Phase 1 accordion after Phase 0 compression. LLM-facilitated familiarization dialogue on compressed corpus (643 representatives from 1000 sentences). Reflexive positioning injected into every downstream prompt (contract-enforced ≥20 chars). Contract: B&C 2006 p. 87 × 3. Better: makes familiarization auditable and separable from compression; preserves B&C reflexivity commitment explicitly.", ), ComparisonRow( step="Phase 2 — Initial Coding", commitment="B&C 2006 p. 84, p. 88: semantic-XOR-latent orientation; systematic coverage; reflexivity", reference_technique="G&W 2022: researcher manually codes selected representatives in spreadsheet-like UI (Tkinter). No AI assistance.", our_technique="Mistral temp=0.0 proposes codes across 3 iterations on compressed corpus; reflexive positioning per prompt; researcher overrides via `human_code_iter1/2/3` + `flagged` + `final_code`. Contract: B&C 2006 p. 84, p. 88, reflexivity × 5. Better: scales across representatives while preserving researcher authority; hallucination bounded by exact-sentence-quote requirement.", ), ComparisonRow( step="Phase 3-6 — Themes → Review → Define → Report", commitment="B&C 2006 Phases 3-6 as specified; applied to codes from compressed corpus", reference_technique="G&W 2022: researcher manually creates theme visualizations (chord diagrams), manually reviews quotes, manually writes report", our_technique="Same as B&C Workbench Phases 3-6 — embedding-based theme clustering, cohesion-scored review, LLM-drafted definitions and report with researcher override at every step. See B&C comparison for per-phase detail.", ), ], ) # ============================================================================ # CGT Workbench — Nelson 2020 computational grounded theory + C&R 2022 # ============================================================================ CGT_COMPARISON = MethodologyComparison( workbench_name="CGT Workbench (Computational Grounded Theory — Nelson + C&R)", reference_papers=[ "Nelson, L.K. (2020). Computational grounded theory: A methodological framework. " "Sociological Methods & Research, 49(1), 3-42.", "Carlsen, H.B. & Ralund, S. (2022). Computational grounded theory revisited: " "From computer-led to computer-assisted text analysis. Big Data & Society, 9(1).", ], principle=( "We preserve the full methodological rigor of Nelson's (2020) three-step " "computational grounded theory framework — Pattern Detection (unsupervised ML), " "Pattern Refinement (researcher close-reading), Pattern Confirmation (supervised ML) — " "with Carlsen & Ralund's (2022) researcher-centrality principle enforced at every " "step. The 2020 framework used word2vec-era embeddings and k-means clustering for " "detection, and bag-of-words + logistic regression for confirmation; we upgrade " "both to sentence-transformer-based techniques while preserving the three-step " "structure and researcher authority. Maps to traditional GT: Pattern Detection ≈ " "open coding, Refinement ≈ axial coding, Confirmation ≈ selective coding." ), rows=[ ComparisonRow( step="Step 1 — Pattern Detection", commitment="Nelson 2020: surface structural patterns via unsupervised ML; researcher interprets labels. C&R 2022: researcher approves labels, not algorithm.", reference_technique="word2vec (2013-era word embeddings, context-blind) OR LDA bag-of-words; k-means clustering with k specified upfront; researcher manually reads cluster exemplars and names them", our_technique="MiniLM all-MiniLM-L6-v2 sentence embeddings (384-dim, transformer-based, context-aware) + agglomerative clustering (cosine similarity, researcher-set threshold; cluster count discovered from data) + LLM drafts cluster labels + researcher validates and renames. Contract: Nelson 2020 × 4. Better: sentence-level semantics (word2vec was word-level, couldn't handle unseen vocabulary or multi-word context); agglomerative discovers cluster count (k-means required guessing k); LLM labeling + researcher override is faster and more auditable than manual cluster-by-cluster interpretation.", ), ComparisonRow( step="Step 2 — Pattern Refinement", commitment="Nelson 2020: deep reading of pattern exemplars; researcher refines pattern definitions; keep/merge/split/drop decisions", reference_technique="Researcher manually reads clusters, writes memos in a notebook, decides fate of each pattern through introspection. No tool assistance beyond the clustering from Step 1.", our_technique="[Pending Turn 3 build] Tool surfaces top-N exemplars per pattern sorted by centroid proximity; LLM drafts interpretive memo per pattern; researcher writes final memo + enters keep/merge/split/drop/rename verdict. Contract: Nelson 2020 × TBD. Better: exemplar surfacing is reproducible; memo drafts save hours while preserving researcher's final interpretation.", ), ComparisonRow( step="Step 3 — Pattern Confirmation", commitment="Nelson 2020: test pattern generalizability via supervised ML on held-out sample; researcher inspects classifier failures", reference_technique="Bag-of-words TF-IDF features + logistic regression classifier; k-fold cross-validation; researcher labels held-out sentences manually; researcher reads confusion matrix", our_technique="[Pending Turn 4 build] MiniLM sentence embeddings as features (semantic similarity, not just word overlap) + logistic regression classifier + researcher-labeled held-out split (A2 default = document-level split; A1 toggle = random 20/80 at sentence level) + confusion matrix + per-pattern precision/recall + researcher inspects classifier disagreements. Contract: Nelson 2020 × TBD. Better: sentence embeddings encode contextual meaning (bag-of-words couldn't distinguish 'I agree with management' from 'I agree management is bad' beyond word frequency); document-level split tests generalization across contexts, not just within one context, yielding stronger validity claim.", ), ], ) # ============================================================================ # Registry — for lookup from app.py # ============================================================================ COMPARISONS = { "bc": BC_COMPARISON, "gw": GW_COMPARISON, "cgt": CGT_COMPARISON, } # ============================================================================ # Self-documentation # ============================================================================ if __name__ == "__main__": for key, comp in COMPARISONS.items(): print(f"\n{'=' * 78}") print(f" {key.upper()} — {comp.workbench_name}") print(f"{'=' * 78}\n") print(comp.as_markdown())