Spaces:

resberry
/

MrCardio

Sleeping

App Files Files Community

resberry commited on Apr 9

Commit

fa8ce00

verified ·

1 Parent(s): 41aa811

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -3

app.py CHANGED Viewed

@@ -22,8 +22,10 @@ from langgraph.graph import StateGraph, START, END
 # ============================================================
 # HUGGING FACE SPACES READY
 # Medical CSV RAG Chatbot
-# Mobile-friendly UI/UX version
-# Pipeline: RAG retrieval -> local ECG adapter reasoning -> grounded summary
 # ============================================================
 # -------------------------------
@@ -41,11 +43,11 @@ logger = logging.getLogger(__name__)
 # -------------------------------
 @dataclass
 class Config:
     base_model_path: str = os.getenv(
         "BASE_MODEL_PATH",
         "meta-llama/Llama-3.1-8B-Instruct"
     )
     adapter_dir: str = os.getenv(
         "ADAPTER_DIR",
         "adapter_refined_v10"
@@ -60,32 +62,39 @@ class Config:
     )
     vectorstore_dir: str = field(init=False)
     hf_token: str = os.getenv("HF_TOKEN", "")
     deepseek_api_key: str = os.getenv("DEEPSEEK_API_KEY", "")
     deepseek_base_url: str = os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com")
     deepseek_model: str = os.getenv("DEEPSEEK_MODEL", "deepseek-chat")
     deepseek_temperature: float = float(os.getenv("DEEPSEEK_TEMPERATURE", "0.1"))
     deepseek_max_tokens: int = int(os.getenv("DEEPSEEK_MAX_TOKENS", "550"))
     embed_model_name: str = os.getenv(
         "EMBED_MODEL_NAME",
         "sentence-transformers/all-MiniLM-L6-v2"
     )
     similarity_k: int = int(os.getenv("SIMILARITY_K", "12"))
     top_k_final: int = int(os.getenv("TOP_K_FINAL", "4"))
     max_context_chars: int = int(os.getenv("MAX_CONTEXT_CHARS", "5200"))
     max_input_len: int = int(os.getenv("MAX_INPUT_LEN", "4096"))
     max_new_tokens_local: int = int(os.getenv("MAX_NEW_TOKENS_LOCAL", "180"))
     max_chat_history_turns: int = int(os.getenv("MAX_CHAT_HISTORY_TURNS", "6"))
     min_lexical_overlap: float = float(os.getenv("MIN_LEXICAL_OVERLAP", "0.08"))
     min_faiss_similarity: float = float(os.getenv("MIN_FAISS_SIMILARITY", "0.20"))
     strong_retrieval_threshold: float = float(os.getenv("STRONG_RETRIEVAL_THRESHOLD", "0.30"))
     strong_retrieval_min_docs: int = int(os.getenv("STRONG_RETRIEVAL_MIN_DOCS", "3"))
     use_query_cache: bool = os.getenv("USE_QUERY_CACHE", "true").lower() == "true"
     enable_query_expansion: bool = os.getenv("ENABLE_QUERY_EXPANSION", "true").lower() == "true"
     enable_validator: bool = os.getenv("ENABLE_VALIDATOR", "true").lower() == "true"
@@ -93,12 +102,15 @@ class Config:
     show_debug_panel: bool = os.getenv("SHOW_DEBUG_PANEL", "true").lower() == "true"
     allow_rebuild_vectorstore: bool = os.getenv("ALLOW_REBUILD_VECTORSTORE", "false").lower() == "true"
     use_4bit: bool = os.getenv("USE_4BIT", "true").lower() == "true"
     launch_debug: bool = os.getenv("LAUNCH_DEBUG", "false").lower() == "true"
     server_name: str = os.getenv("SERVER_NAME", "0.0.0.0")
     server_port: int = int(os.getenv("SERVER_PORT", "7860"))
     blink_stage_1: float = float(os.getenv("BLINK_STAGE_1", "0.40"))
     blink_stage_2: float = float(os.getenv("BLINK_STAGE_2", "0.55"))
     blink_stage_3: float = float(os.getenv("BLINK_STAGE_3", "0.50"))

 # ============================================================
 # HUGGING FACE SPACES READY
 # Medical CSV RAG Chatbot
+# Optimized pipeline:
+# RAG retrieval -> local ECG adapter reasoning -> grounded summary
+# UI goal:
+# polished mobile-friendly chatbot UX with minimal sources panel
 # ============================================================
 # -------------------------------
 # -------------------------------
 @dataclass
 class Config:
+    # Paths
     base_model_path: str = os.getenv(
         "BASE_MODEL_PATH",
         "meta-llama/Llama-3.1-8B-Instruct"
     )
     adapter_dir: str = os.getenv(
         "ADAPTER_DIR",
         "adapter_refined_v10"
     )
     vectorstore_dir: str = field(init=False)
+    # Auth / APIs
     hf_token: str = os.getenv("HF_TOKEN", "")
     deepseek_api_key: str = os.getenv("DEEPSEEK_API_KEY", "")
     deepseek_base_url: str = os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com")
     deepseek_model: str = os.getenv("DEEPSEEK_MODEL", "deepseek-chat")
+    # DeepSeek generation
     deepseek_temperature: float = float(os.getenv("DEEPSEEK_TEMPERATURE", "0.1"))
     deepseek_max_tokens: int = int(os.getenv("DEEPSEEK_MAX_TOKENS", "550"))
+    # Embeddings
     embed_model_name: str = os.getenv(
         "EMBED_MODEL_NAME",
         "sentence-transformers/all-MiniLM-L6-v2"
     )
+    # Retrieval
     similarity_k: int = int(os.getenv("SIMILARITY_K", "12"))
     top_k_final: int = int(os.getenv("TOP_K_FINAL", "4"))
     max_context_chars: int = int(os.getenv("MAX_CONTEXT_CHARS", "5200"))
+    # Generation
     max_input_len: int = int(os.getenv("MAX_INPUT_LEN", "4096"))
     max_new_tokens_local: int = int(os.getenv("MAX_NEW_TOKENS_LOCAL", "180"))
     max_chat_history_turns: int = int(os.getenv("MAX_CHAT_HISTORY_TURNS", "6"))
+    # Filtering
     min_lexical_overlap: float = float(os.getenv("MIN_LEXICAL_OVERLAP", "0.08"))
     min_faiss_similarity: float = float(os.getenv("MIN_FAISS_SIMILARITY", "0.20"))
     strong_retrieval_threshold: float = float(os.getenv("STRONG_RETRIEVAL_THRESHOLD", "0.30"))
     strong_retrieval_min_docs: int = int(os.getenv("STRONG_RETRIEVAL_MIN_DOCS", "3"))
+    # Features
     use_query_cache: bool = os.getenv("USE_QUERY_CACHE", "true").lower() == "true"
     enable_query_expansion: bool = os.getenv("ENABLE_QUERY_EXPANSION", "true").lower() == "true"
     enable_validator: bool = os.getenv("ENABLE_VALIDATOR", "true").lower() == "true"
     show_debug_panel: bool = os.getenv("SHOW_DEBUG_PANEL", "true").lower() == "true"
     allow_rebuild_vectorstore: bool = os.getenv("ALLOW_REBUILD_VECTORSTORE", "false").lower() == "true"
+    # Model loading
     use_4bit: bool = os.getenv("USE_4BIT", "true").lower() == "true"
+    # Launch
     launch_debug: bool = os.getenv("LAUNCH_DEBUG", "false").lower() == "true"
     server_name: str = os.getenv("SERVER_NAME", "0.0.0.0")
     server_port: int = int(os.getenv("SERVER_PORT", "7860"))
+    # UI timings
     blink_stage_1: float = float(os.getenv("BLINK_STAGE_1", "0.40"))
     blink_stage_2: float = float(os.getenv("BLINK_STAGE_2", "0.55"))
     blink_stage_3: float = float(os.getenv("BLINK_STAGE_3", "0.50"))