File size: 1,660 Bytes
42bf28c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0bd6bcb
42bf28c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1505b73
 
 
42bf28c
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
"""

"""

import os
import torch
from llama_index.core import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
from llama_index.core.prompts import PromptTemplate



# ---- Model IDs ---
AGENT_MODEL_ID = "google/gemma-3-12b-it"
EMBEDDING_MODEL_ID = "clip-ViT-B-32"


# --- Environment & Paths ---
CHROMADB = os.getenv("CHROMADB")
CELLPOSE_SAM = os.getenv("CELLPOSE_SAM")
HF_TOKEN = os.getenv("HF_TOKEN")

NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_USERNAME = os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
NEO4J_DATABASE = os.getenv("NEO4J_DATABASE")


# --- LlamaIndex Global Settings ---
def configure_llama_index():

    """
    Configures global LlamaIndex settings for the embedding model and the LLM.
    """
    
    print("βœ“ Configuring LlamaIndex settings...")

    # Gemma 3 Prompt Template 
    query_wrapper_prompt = PromptTemplate(
        "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{query_str}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
    )
    
    llm = HuggingFaceInferenceAPI(
        model_name=AGENT_MODEL_ID,
        token = HF_TOKEN,
        provider = "auto",
        requests_per_minute=60,
        timeout=300        
    )

    Settings.llm = llm

    Settings.embed_model = HuggingFaceEmbedding(
        model_name=f"sentence-transformers/{EMBEDDING_MODEL_ID}"
    )
    
    Settings.chunk_size = 512
    Settings.chunk_overlap = 50
    
    print("βœ“ LlamaIndex configured to use local Embedding Model and LLM.")