Spaces:

InnovisionLLC
/

DeKCIB

Sleeping

App Files Files

Wenye He commited on Feb 24, 2025

Commit

2f7f89f

1 Parent(s): 678dc55

Upload 15 files

Browse files

Files changed (9) hide show

.gitattributes +6 -0
Dockerfile +49 -0
app.py +289 -0
config.json +1 -0
datas/bge_onnx/config.json +28 -0
datas/bge_onnx/special_tokens_map.json +51 -0
datas/bge_onnx/tokenizer_config.json +61 -0
requirements.txt +12 -0
start.sh +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+datas/bge_onnx/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+vector_stores/anatomical_regions_head_and_neck.duckdb filter=lfs diff=lfs merge=lfs -text
+vector_stores/anatomical_regions_torso.duckdb filter=lfs diff=lfs merge=lfs -text
+vector_stores/CFIR.duckdb filter=lfs diff=lfs merge=lfs -text
+vector_stores/injury_typology_neurological_injuries.duckdb filter=lfs diff=lfs merge=lfs -text
+vector_stores/injury_typology_soft_tissue_injuries.duckdb filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,49 @@

+# Base image with CUDA 12.1 and Ubuntu 22.04
+FROM nvidia/cuda:12.1.1-base-ubuntu22.04
+# Install Python 3.10 and essential dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    python3.10 \
+    python3.10-dev \
+    python3.10-distutils \
+    curl \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# Make Python 3.10 the default
+RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
+# Install pip for Python 3.10
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
+# Install Ollama with GPU layers
+ENV OLLAMA_GPU_LAYERS=100
+RUN curl -fsSL https://ollama.com/install.sh | sh
+# Set up application directory
+WORKDIR /app
+COPY . .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Configure environment variables (FROM YOUR ORIGINAL SETUP)
+ENV VECTOR_STORE_DIR=/app/vector_stores \
+    EMBED_MODEL_PATH=/app/datas/bge_onnx \
+    PYTHONUNBUFFERED=1 \
+    GRADIO_SERVER_NAME="0.0.0.0"
+# Verify CUDA and Python versions
+RUN python3 -c "import torch; print(f'PyTorch CUDA available: {torch.cuda.is_available()}')" && \
+    python3 --version
+# Expose ports for Ollama and Gradio
+EXPOSE 11434 7860
+# Copy and set permissions for start script
+COPY start.sh /app/start.sh
+RUN chmod +x /app/start.sh
+# Start services using the startup script
+CMD ["/app/start.sh"]

app.py ADDED Viewed

	@@ -0,0 +1,289 @@

+# app.py
+from llama_index.embeddings.huggingface_optimum import OptimumEmbedding
+import gradio as gr
+from llama_index.core import Settings
+from llama_index.core import VectorStoreIndex, StorageContext
+from llama_index.vector_stores.duckdb import DuckDBVectorStore
+from llama_index.llms.ollama import Ollama
+from llama_index.core.memory import ChatMemoryBuffer
+import json
+import ollama
+import os
+import uuid
+# Configuration
+VECTOR_STORE_DIR = "./vector_stores"
+EMBED_MODEL_PATH = "./datas/bge_onnx"
+CONFIG_PATH = "config.json"
+DEFAULT_LLM = "Jatin19K/unsloth-q5_k_m-mistral-nemo-instruct-2407"
+DEFAULT_VECTOR_STORE = "CFIR"
+class ModelManager:
+    def __init__(self):
+        self.config = self._load_config()
+        self.available_models = self._initialize_models()
+    def _load_config(self):
+        """Load model configuration from JSON file"""
+        try:
+            with open(CONFIG_PATH, 'r') as f:
+                return json.load(f)
+        except Exception as e:
+            print(f"Error loading config: {e}")
+            return {"models": []}
+    def _initialize_models(self):
+        """Initialize and verify all models from config"""
+        config_models = self.config.get("models", [])
+        available_models = {}
+        # Get currently available Ollama models
+        try:
+            current_models = {m['name'].split(':')[0]: m['name'] for m in ollama.list()['models']}
+            print(current_models)
+        except Exception as e:
+            print(f"Error fetching current models: {e}")
+            current_models = {}
+        # Check each configured model
+        for model_name in config_models:
+            if model_name not in current_models:
+                print(f"Model {model_name} not found locally. Attempting to pull...")
+                try:
+                    ollama.pull(model_name)
+                    available_models[model_name] = model_name
+                    print(f"Successfully pulled model {model_name}")
+                except Exception as e:
+                    print(f"Error pulling model {model_name}: {e}")
+                    continue
+            else:
+                available_models[model_name] = current_models[model_name]
+        return available_models
+    def get_available_models(self):
+        """Return dictionary of available models"""
+        return self.available_models
+class EmbeddingManager:
+    def __init__(self):
+        self.embed_model = None
+        self._initialize_embed_model()
+    def _initialize_embed_model(self):
+        """Initialize BGE ONNX embedding model with validation"""
+        try:
+            if not os.path.exists(EMBED_MODEL_PATH):
+                raise FileNotFoundError(f"BGE ONNX model not found at {EMBED_MODEL_PATH}")
+            self.embed_model = OptimumEmbedding(folder_name=EMBED_MODEL_PATH)
+            Settings.embed_model = self.embed_model
+            print("Successfully initialized BGE embedding model")
+        except Exception as e:
+            print(f"Embedding model error: {e}")
+# Initialize managers
+model_manager = ModelManager()
+embed_manager = EmbeddingManager()
+# def get_available_models():
+#     """Check locally available Ollama models"""
+#     try:
+#         models = ollama.list()['models']
+#         model_dict = {m['name'].split(':')[0]: m['name'] for m in models}
+#         # Create ordered list with default first
+#         ordered_models = {}
+#         if DEFAULT_LLM in model_dict:
+#             ordered_models[DEFAULT_LLM] = model_dict[DEFAULT_LLM]
+#         # Add remaining models alphabetically
+#         for name in sorted(model_dict.keys()):
+#             if name != DEFAULT_LLM:
+#                 ordered_models[name] = model_dict[name]
+#         return ordered_models
+#     except Exception as e:
+#         print(f"Error fetching models: {e}")
+#         return {DEFAULT_LLM: DEFAULT_LLM}  # Fallback
+def get_available_vector_stores():
+    """Scan vector store directory for DuckDB files"""
+    vector_stores = {}
+    if os.path.exists(VECTOR_STORE_DIR):
+        cfir_path = os.path.join(VECTOR_STORE_DIR, f"{DEFAULT_VECTOR_STORE}.duckdb")
+        if os.path.exists(cfir_path):
+            vector_stores[DEFAULT_VECTOR_STORE] = {
+                "path": cfir_path,
+                "display_name": DEFAULT_VECTOR_STORE
+            }
+        # Add other stores
+        for file in os.listdir(VECTOR_STORE_DIR):
+            if file.endswith(".duckdb") and file != f"{DEFAULT_VECTOR_STORE}.duckdb":
+                store_name = file[:-7]
+                display_name = store_name.replace('_', ' ')
+                vector_stores[store_name] = {
+                    "path": os.path.join(VECTOR_STORE_DIR, file),
+                    "display_name": display_name
+                }
+    return vector_stores
+class ChatSessionManager:
+    def __init__(self):
+        self.sessions = {}
+        self.llm_options = model_manager.get_available_models()
+        self.vector_stores = get_available_vector_stores()
+    def refresh_models(self):
+        self.llm_options = model_manager.get_available_models()
+    def refresh_vector_stores(self):
+        self.vector_stores = get_available_vector_stores()
+    def get_chat_engine(self, session_id, llm_choice, vector_store_choice):
+        """Create chat engine with configured embeddings"""
+        if session_id not in self.sessions:
+            # Verify vector store exists
+            if vector_store_choice not in self.vector_stores:
+                raise ValueError(f"Vector store {vector_store_choice} not found")
+            # Verify model exists
+            if llm_choice not in self.llm_options.values():
+                raise ValueError(f"Model {llm_choice} not available")
+            # Configure LLM
+            Settings.llm = Ollama(
+                model=llm_choice,
+                request_timeout=120,
+                temperature=0.3
+            )
+            # Load vector store
+            vs_path = self.vector_stores[vector_store_choice]["path"]
+            vector_store = DuckDBVectorStore.from_local(vs_path)
+            storage_context = StorageContext.from_defaults(vector_store=vector_store)
+            index = VectorStoreIndex.from_vector_store(
+                vector_store=vector_store,
+                storage_context=storage_context
+            )
+            memory = ChatMemoryBuffer.from_defaults()
+            self.sessions[session_id] = index.as_chat_engine(
+                chat_mode="context",  # <-- Change chat mode
+                memory=memory,  # <-- Add memory
+                system_prompt=(
+                    "You are a helpful assistant which helps users to understand scientific knowledge"
+                    "about biomechanics of injuries to human bodies."
+                ),
+                similarity_top_k=3
+            )
+        return self.sessions[session_id]
+# Initialize session manager
+session_manager = ChatSessionManager()
+def chat_response(message, history, llm_choice, vector_store_choice, session_state):
+    try:
+        # Manage session state
+        if not session_state:
+            session_id = str(uuid.uuid4())
+            session_state = {"session_id": session_id}
+        else:
+            session_id = session_state["session_id"]
+        chat_engine = session_manager.get_chat_engine(session_id, llm_choice, vector_store_choice)
+        response = chat_engine.chat(message)
+        # Process response
+        sources = [
+            f"• {node.metadata.get('file_name', 'Unknown')}"
+            for node in response.source_nodes
+        ]
+        # bot_message = f"{response.response}\n\nSources:\n" + "\n".join(sources)
+        bot_message = f"{response.response}\n"
+        return history + [(message, bot_message)], session_state
+        # return history + [(message)], session_state
+    except Exception as e:
+        return history + [(message, f"Error: {str(e)}")], session_state
+# Gradio interface with embedding status
+with gr.Blocks(title="De-KCIB(Deep Knowledge Center for Injury Biomechanics)") as demo:
+    session_state = gr.State()
+    with gr.Row():
+        # gr.set_static_paths(paths=["static/logo.png"])
+            # gr.HTML("""
+            # <img src="/file=static/logo.png"
+            #      alt="Company Logo"
+            #      style="height: 100px; object-fit: contain;">
+            # """)
+        gr.HTML("<img src='https://www.ussbchamber.org/wp-content/uploads/2021/04/innovisionlogo.png' />")
+            # gr.Markdown("<img src='file/logo.png' alt='Company Logo' />")
+    with gr.Row():
+        gr.Markdown("# De-KCIB(Deep Knowledge Center for Injury Biomechanics)")
+    with gr.Row():
+        with gr.Column(scale=1):
+            llm_dropdown = gr.Dropdown(
+                label="Select Language Model",
+                choices=list(session_manager.llm_options.values()),
+                value=next(iter(session_manager.llm_options.values()), None)
+            )
+            vector_dropdown = gr.Dropdown(
+                label="Injury Biomechanics Knowledge Base",
+                choices=[(v["display_name"], k) for k, v in session_manager.vector_stores.items()],
+                value=next(iter(session_manager.vector_stores.keys()), None)
+            )
+            # refresh_btn = gr.Button("Refresh Resources")
+            # embed_status = gr.Markdown(
+            #     f"**Embedding Model:** {embed_manager.embed_model.model_name}"
+            #     if embed_manager.embed_model else
+            #     "**Warning:** Using fallback embeddings"
+            # )
+        with gr.Column(scale=3):
+            chatbot = gr.Chatbot(height=500)
+            msg = gr.Textbox(label="Query")
+            clear_btn = gr.Button("Clear Session")
+    # # Event handlers
+    # refresh_btn.click(
+    #     lambda: [
+    #         session_manager.refresh_models(),
+    #         session_manager.refresh_vector_stores()
+    #     ],
+    #     outputs=[llm_dropdown, vector_dropdown]
+    # )
+    msg.submit(
+        chat_response,
+        [msg, chatbot, llm_dropdown, vector_dropdown, session_state],
+        [chatbot, session_state]  # <-- Update outputs
+    )
+    clear_btn.click(
+        lambda: (None, None),  # Reset both chat and session
+        None,
+        [chatbot, session_state],
+        queue=False
+    )
+# Deployment settings
+if __name__ == "__main__":
+    demo.launch()
+    # demo.launch(share=True)

config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ { "models": ["Jatin19K/unsloth-q5_k_m-mistral-nemo-instruct-2407", "Jatin19K/unsloth_q8_0_meta_llama_3.1_8b_instruct_bnb_4bit_innovision_dekcib"] }

datas/bge_onnx/config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "_name_or_path": "Snowflake/snowflake-arctic-embed-l-v2.0",
+  "architectures": [
+    "XLMRobertaModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 8194,
+  "model_type": "xlm-roberta",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "output_past": true,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.46.3",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 250002
+}

datas/bge_onnx/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

datas/bge_onnx/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,61 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "250001": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": "<mask>",
+  "max_length": 512,
+  "model_max_length": 8192,
+  "pad_to_multiple_of": null,
+  "pad_token": "<pad>",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "</s>",
+  "stride": 0,
+  "tokenizer_class": "XLMRobertaTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "<unk>"
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+--extra-index-url https://download.pytorch.org/whl/cu121
+numpy==1.26.4
+ollama==0.3.3
+onnx==1.17.0
+gradio==5.16.0
+ollama
+llama-index-core
+llama-index-embeddings-huggingface-optimum
+llama-index-llms-ollama
+llama-index-vector-stores-duckdb
+duckdb
+torch==2.5.0+cu121

start.sh ADDED Viewed

	@@ -0,0 +1,3 @@

+#!/bin/sh
+ollama serve > /dev/null 2>&1 &
+sleep 10 && python3 app.py