gpaasch commited on
Commit
984d858
·
1 Parent(s): 255428d

use llamacpp integration instead

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -0
  2. src/app.py +10 -13
requirements.txt CHANGED
@@ -9,6 +9,7 @@ accelerate
9
  llama-index>=0.9.0
10
  llama-index-embeddings-huggingface
11
  llama-index-llms-huggingface
 
12
 
13
  # Language models and embeddings
14
  sentence-transformers>=2.2.0
 
9
  llama-index>=0.9.0
10
  llama-index-embeddings-huggingface
11
  llama-index-llms-huggingface
12
+ llama-index-llms-llama-cpp # Add this line
13
 
14
  # Language models and embeddings
15
  sentence-transformers>=2.2.0
src/app.py CHANGED
@@ -2,8 +2,7 @@ import os
2
  import gradio as gr
3
  from llama_index.core import Settings, ServiceContext
4
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
5
- from llama_index.llms.huggingface import HuggingFaceLLM
6
- from ctransformers import AutoModelForCausalLM
7
  from parse_tabular import create_symptom_index
8
  import json
9
 
@@ -12,18 +11,16 @@ Settings.embed_model = HuggingFaceEmbedding(
12
  model_name="sentence-transformers/all-MiniLM-L6-v2"
13
  )
14
 
15
- # Configure local LLM with ctransformers
16
- model = AutoModelForCausalLM.from_pretrained(
17
- "TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
18
- model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
19
- model_type="mistral",
20
- gpu_layers=0 # Set > 0 if you have GPU support
21
- )
22
-
23
- llm = HuggingFaceLLM(
24
- model=model,
25
  context_window=2048,
26
- max_new_tokens=256
 
 
27
  )
28
 
29
  # Create service context with local LLM
 
2
  import gradio as gr
3
  from llama_index.core import Settings, ServiceContext
4
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
5
+ from llama_index_llms_llama_cpp import LlamaCPP
 
6
  from parse_tabular import create_symptom_index
7
  import json
8
 
 
11
  model_name="sentence-transformers/all-MiniLM-L6-v2"
12
  )
13
 
14
+ # Configure local LLM with LlamaCPP
15
+ llm = LlamaCPP(
16
+ model_url="https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf",
17
+ model_path="models/mistral-7b-instruct-v0.1.Q4_K_M.gguf",
18
+ temperature=0.7,
19
+ max_new_tokens=256,
 
 
 
 
20
  context_window=2048,
21
+ # GPU configuration
22
+ n_gpu_layers=0, # Increase for GPU support
23
+ n_threads=8 # Adjust based on your CPU
24
  )
25
 
26
  # Create service context with local LLM