Spaces:

kenlkehl
/

mm-ai-demo

Sleeping

App Files Files Community

mm-ai-demo / config.py

kenlkehl

Update config.py

b209fb2 verified about 1 month ago

raw

history blame contribute delete

3.08 kB

	# Configuration for Clinical Trial Matching Pipeline
	#
	# Edit the values below to set your default models and trial database.
	# Models will auto-load on application startup.

	# ============================================================================
	# MODEL PATHS - Set your default models here
	# ============================================================================

	# Set to None to skip auto-loading, or provide model path/HuggingFace ID
	MODEL_CONFIG = {
	# TinyBERT tagger for extracting relevant excerpts
	"tagger": "kenlkehl/mmai-tiny-bert-tagger", # e.g., "prajjwal1/bert-tiny" or "./auto-tiny-bert-tagger"

	# Sentence transformer for embedding patient summaries and trials
	"embedder": "kenlkehl/mmai-trialspace", # e.g., "Qwen/Qwen3-Embedding-0.6B" or "./reranker_round2.model"

	# Large language model for patient history summarization
	"llm": "kenlkehl/mmai-oncoreasoning-3b", # e.g., "microsoft/Phi-3-mini-4k-instruct" or "openai/gpt-oss-120b"
	#"llm": "meta-llama/Llama-3.2-1B-Instruct",

	# ModernBERT classifier for eligibility prediction
	"trial_checker": "kenlkehl/mmai-trial-checker", # e.g., "answerdotai/ModernBERT-large" or "./modernbert-trial-checker"

	# ModernBERT classifier for boilerplate exclusion prediction
	"boilerplate_checker": "kenlkehl/mmai-boilerplate-checker", # e.g., "answerdotai/ModernBERT-large" or "./modernbert-boilerplate-checker"
	}

	# Example configuration with base models:
	# MODEL_CONFIG = {
	# "tagger": "prajjwal1/bert-tiny",
	# "embedder": "Qwen/Qwen3-Embedding-0.6B",
	# "llm": "microsoft/Phi-3-mini-4k-instruct",
	# "trial_checker": "answerdotai/ModernBERT-large",
	# "boilerplate_checker": "answerdotai/ModernBERT-large",
	# }

	# Example configuration with fine-tuned models:
	# MODEL_CONFIG = {
	# "tagger": "./auto-tiny-bert-tagger",
	# "embedder": "./reranker_round2.model",
	# "llm": "/data/models/gpt-oss-120b",
	# "trial_checker": "./modernbert-trial-checker",
	# "boilerplate_checker": "./modernbert-boilerplate-checker",
	# }

	# ============================================================================
	# DEFAULT TRIAL DATABASE
	# ============================================================================

	# Path to default trial database CSV/Excel file
	# Will auto-load and embed when embedder model is ready
	# Set to None to disable auto-loading
	DEFAULT_TRIAL_DB = "trial_space_lineitems.csv" # e.g., "./my_trials.csv" or "./sample_trials.csv"

	PREEMBEDDED_TRIALS = "trial_embeddings"

	# ============================================================================
	# USAGE NOTES
	# ============================================================================
	#
	# 1. Set the model paths above to your preferred models
	# 2. Optionally set DEFAULT_TRIAL_DB to your trial database file
	# 3. Save this file
	# 4. Run: python trial_matching_app.py
	# 5. Models will load automatically on startup
	#
	# You can still manually load different models through the web interface
	# if you need to switch models during a session.
	#