Spaces:

Agents-MCP-Hackathon
/

MedCodeMCP

Sleeping

gpaasch commited on Jun 7, 2025

Commit

984d858

1 Parent(s): 255428d

use llamacpp integration instead

Files changed (2) hide show

requirements.txt CHANGED Viewed

@@ -9,6 +9,7 @@ accelerate
 llama-index>=0.9.0
 llama-index-embeddings-huggingface
 llama-index-llms-huggingface
 # Language models and embeddings
 sentence-transformers>=2.2.0

 llama-index>=0.9.0
 llama-index-embeddings-huggingface
 llama-index-llms-huggingface
+llama-index-llms-llama-cpp  # Add this line
 # Language models and embeddings
 sentence-transformers>=2.2.0

src/app.py CHANGED Viewed

@@ -2,8 +2,7 @@ import os
 import gradio as gr
 from llama_index.core import Settings, ServiceContext
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
-from llama_index.llms.huggingface import HuggingFaceLLM
-from ctransformers import AutoModelForCausalLM
 from parse_tabular import create_symptom_index
 import json
@@ -12,18 +11,16 @@ Settings.embed_model = HuggingFaceEmbedding(
     model_name="sentence-transformers/all-MiniLM-L6-v2"
 )
-# Configure local LLM with ctransformers
-model = AutoModelForCausalLM.from_pretrained(
-    "TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
-    model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
-    model_type="mistral",
-    gpu_layers=0  # Set > 0 if you have GPU support
-)
-llm = HuggingFaceLLM(
-    model=model,
     context_window=2048,
-    max_new_tokens=256
 )
 # Create service context with local LLM

 import gradio as gr
 from llama_index.core import Settings, ServiceContext
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index_llms_llama_cpp import LlamaCPP
 from parse_tabular import create_symptom_index
 import json
     model_name="sentence-transformers/all-MiniLM-L6-v2"
 )
+# Configure local LLM with LlamaCPP
+llm = LlamaCPP(
+    model_url="https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf",
+    model_path="models/mistral-7b-instruct-v0.1.Q4_K_M.gguf",
+    temperature=0.7,
+    max_new_tokens=256,
     context_window=2048,
+    # GPU configuration
+    n_gpu_layers=0,  # Increase for GPU support
+    n_threads=8      # Adjust based on your CPU
 )
 # Create service context with local LLM