File size: 3,077 Bytes
2ed7323
 
 
 
 
 
 
 
 
 
 
 
07330d2
2ed7323
 
07330d2
2ed7323
 
b209fb2
 
2ed7323
 
07330d2
2ed7323
 
07330d2
2ed7323
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
07330d2
2ed7323
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# Configuration for Clinical Trial Matching Pipeline
#
# Edit the values below to set your default models and trial database.
# Models will auto-load on application startup.

# ============================================================================
# MODEL PATHS - Set your default models here
# ============================================================================

# Set to None to skip auto-loading, or provide model path/HuggingFace ID
MODEL_CONFIG = {
    # TinyBERT tagger for extracting relevant excerpts
    "tagger": "kenlkehl/mmai-tiny-bert-tagger",  # e.g., "prajjwal1/bert-tiny" or "./auto-tiny-bert-tagger"
    
    # Sentence transformer for embedding patient summaries and trials
    "embedder": "kenlkehl/mmai-trialspace",  # e.g., "Qwen/Qwen3-Embedding-0.6B" or "./reranker_round2.model"
    
    # Large language model for patient history summarization
    "llm": "kenlkehl/mmai-oncoreasoning-3b",  # e.g., "microsoft/Phi-3-mini-4k-instruct" or "openai/gpt-oss-120b"
    #"llm": "meta-llama/Llama-3.2-1B-Instruct",
    
    # ModernBERT classifier for eligibility prediction
    "trial_checker": "kenlkehl/mmai-trial-checker",  # e.g., "answerdotai/ModernBERT-large" or "./modernbert-trial-checker"
    
    # ModernBERT classifier for boilerplate exclusion prediction
    "boilerplate_checker": "kenlkehl/mmai-boilerplate-checker",  # e.g., "answerdotai/ModernBERT-large" or "./modernbert-boilerplate-checker"
}

# Example configuration with base models:
# MODEL_CONFIG = {
#     "tagger": "prajjwal1/bert-tiny",
#     "embedder": "Qwen/Qwen3-Embedding-0.6B",
#     "llm": "microsoft/Phi-3-mini-4k-instruct",
#     "trial_checker": "answerdotai/ModernBERT-large",
#     "boilerplate_checker": "answerdotai/ModernBERT-large",
# }

# Example configuration with fine-tuned models:
# MODEL_CONFIG = {
#     "tagger": "./auto-tiny-bert-tagger",
#     "embedder": "./reranker_round2.model",
#     "llm": "/data/models/gpt-oss-120b",
#     "trial_checker": "./modernbert-trial-checker",
#     "boilerplate_checker": "./modernbert-boilerplate-checker",
# }

# ============================================================================
# DEFAULT TRIAL DATABASE
# ============================================================================

# Path to default trial database CSV/Excel file
# Will auto-load and embed when embedder model is ready
# Set to None to disable auto-loading
DEFAULT_TRIAL_DB = "trial_space_lineitems.csv"  # e.g., "./my_trials.csv" or "./sample_trials.csv"

PREEMBEDDED_TRIALS = "trial_embeddings"

# ============================================================================
# USAGE NOTES
# ============================================================================
# 
# 1. Set the model paths above to your preferred models
# 2. Optionally set DEFAULT_TRIAL_DB to your trial database file
# 3. Save this file
# 4. Run: python trial_matching_app.py
# 5. Models will load automatically on startup
# 
# You can still manually load different models through the web interface
# if you need to switch models during a session.
#