File size: 24,848 Bytes
c181ce0
 
c0e5c04
 
c181ce0
 
5a74e30
c181ce0
76336d2
5a74e30
 
 
 
 
 
 
 
 
 
d8d8050
 
c181ce0
d8d8050
 
4222139
76336d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5a74e30
 
76336d2
c0e5c04
 
 
 
 
76336d2
 
 
 
 
 
c181ce0
5a74e30
 
b12f17b
 
5a74e30
 
 
 
b12f17b
c181ce0
c0e5c04
5a74e30
 
 
c0e5c04
5a74e30
b12f17b
c181ce0
5a74e30
c181ce0
5a74e30
 
b12f17b
 
5a74e30
 
 
c181ce0
b12f17b
 
5a74e30
 
 
 
 
 
 
 
 
938709d
5a74e30
 
938709d
5a74e30
 
938709d
5a74e30
 
 
 
 
 
f5c821c
b12f17b
5a74e30
938709d
cb15d7e
9e56f1b
 
 
 
 
 
cb15d7e
 
9e56f1b
 
 
 
b12f17b
9e56f1b
d8d8050
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e56f1b
cb15d7e
9e56f1b
5a74e30
 
 
9e56f1b
5a74e30
9e56f1b
f5c821c
5a74e30
 
9e56f1b
 
5a74e30
 
 
 
 
f5c821c
b12f17b
f5c821c
9e56f1b
 
5a74e30
 
d8d8050
 
5a74e30
b12f17b
f5c821c
5a74e30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0e5c04
b12f17b
 
5a74e30
 
b12f17b
 
5a74e30
938709d
5a74e30
b12f17b
5a74e30
938709d
5a74e30
b12f17b
 
5a74e30
 
 
 
b12f17b
 
5a74e30
f5c821c
c0e5c04
5a74e30
a99d17a
5a74e30
 
 
c0e5c04
5a74e30
 
 
b12f17b
5a74e30
 
b12f17b
5a74e30
938709d
5a74e30
eca53f4
 
 
 
 
d798578
5a74e30
d798578
eca53f4
d798578
 
 
 
 
 
 
 
5a74e30
eca53f4
5a74e30
 
 
d798578
 
b12f17b
d798578
b761483
b12f17b
d798578
b12f17b
d798578
 
b12f17b
d798578
eca53f4
5a74e30
 
d798578
 
 
b12f17b
d798578
 
b12f17b
d798578
b12f17b
d798578
b12f17b
c06c6fb
 
d798578
 
 
 
b12f17b
d798578
 
 
 
 
 
b12f17b
d798578
b12f17b
d798578
b12f17b
c06c6fb
eca53f4
 
 
c0e5c04
eca53f4
 
 
 
c0e5c04
 
eca53f4
c0e5c04
 
eca53f4
c0e5c04
eca53f4
c0e5c04
d798578
eca53f4
c181ce0
760d971
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d798578
eca53f4
5a74e30
eca53f4
760d971
 
 
 
 
 
 
 
 
 
 
 
5a74e30
eca53f4
 
5a74e30
f5c821c
5a74e30
 
b12f17b
 
5a74e30
 
b12f17b
5a74e30
 
b12f17b
 
d798578
5a74e30
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
# -*- coding: utf-8 -*-
"""
Environment preparation script for vaccine assistant
Creates vector stores and retrieval tools
"""

import os
import json
import re
import nest_asyncio
from typing import List
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_google_genai import ChatGoogleGenerativeAI
from llama_index.core.tools import FunctionTool
from llama_index.core.schema import TextNode
from langchain.prompts import PromptTemplate
import logging

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

def extract_source_ids(response_text):
    """
    Extract source IDs from the response, handling different citation formats:
    - Standard format: [Source ID]
    - Multiple sources in one citation: [Source ID1][Source ID2]
    - Multiple sources in one bracket: [Source ID1, Source ID2]

    Args:
        response_text (str): The generated response text with inline citations.

    Returns:
        list of str: List of unique source IDs found in the response text.
    """
    import re

    # First, extract all source IDs from inline citations with adjacent brackets [ID1][ID2]
    # Replace them with single brackets with comma separation to standardize format
    consolidated_text = re.sub(r'\][\s]*\[', '][', response_text)
    consolidated_text = re.sub(r'\]\[', ', ', consolidated_text)
    
    # Now extract all source IDs from any format (single ID or comma-separated IDs)
    inline_citations = re.findall(r'\[([^\[\]]+)\]', consolidated_text)
    
    if not inline_citations:
        print("Warning: No source IDs found in the response text.")
        return []

    # Process each citation which might contain multiple comma-separated IDs
    all_ids = []
    for citation in inline_citations:
        # Split by comma and strip whitespace
        ids = [id_str.strip() for id_str in citation.split(',')]
        all_ids.extend(ids)
    
    # Get unique source IDs
    source_ids = list(set(all_ids))
    
    # Filter out any non-UUID-like IDs (if needed)
    # This is now optional as we're handling various source ID formats
    # uuid_pattern = r'^[0-9a-f]{8}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{12}$'
    # source_ids = [source_id for source_id in source_ids if re.match(uuid_pattern, source_id, re.IGNORECASE)]
    
    if not source_ids:
        print("Warning: No valid source IDs found after filtering.")
        return []

    return source_ids


def setup_models():
    """Initialize embedding model and LLM"""
    print("πŸ”§ Setting up embedding model and LLM...")
    
    # Initialize embedding model
    embedding_function = HuggingFaceEmbeddings(
        model_name="intfloat/multilingual-e5-base"
    )
    print("βœ… Embedding model initialized: intfloat/multilingual-e5-base")
    
    # Initialize LLM
    genai_api_key = os.getenv('GOOGLE_API_KEY')
    llm = ChatGoogleGenerativeAI(
        model="gemini-2.0-flash",
        google_api_key=genai_api_key
    )
    print("βœ… LLM initialized: gemini-2.0-flash")
    
    return embedding_function, llm

def create_vectorstore_from_json(json_path: str, collection_name: str, embedding_function):
    """Create vector store from JSON chunks"""
    print(f"πŸ“š Creating vector store from: {json_path}")
    
    # Load the chunks.json
    with open(json_path, "r", encoding="utf-8") as f:
        chunks_data = json.load(f)
    
    print(f"πŸ“Š Loaded {len(chunks_data)} chunks from JSON")
    
    documents = []
    for element in chunks_data:
        text = element["text"]
        metadata = {
            "language": "fra",
            "source": element["filename"],
            "filetype": element["filetype"],
            "element_id": element["element_id"]
        }

        if "TableElement" == element["type"]:
            metadata["table_text_as_html"] = element["table_text_as_html"]

        doc = Document(page_content=text, metadata=metadata)
        documents.append(doc)

    # Create vector store
    vectorstore = Chroma.from_documents(
        documents=documents,
        embedding=embedding_function,
        collection_name=collection_name,
        persist_directory="chroma_db_multilingual"
    )
    print(f"βœ… Vector store created with collection: {collection_name}")
    return vectorstore, documents

def create_retriever(vectorstore, docs, llm, bm25_k=3,vector_k=6):
    """Create ensemble retriever with vector and BM25 search
    
    Args:
        vectorstore: The vector store for similarity search
        docs: Documents for BM25 retriever
        llm: Language model for multi-query generation
        bm25_k: Number of documents to retrieve with BM25
        vector_k: Number of documents to retrieve with vector search
    
    Returns:
        Configured retriever (MultiQueryRetriever or EnsembleRetriever)
    """
    print("πŸ” Creating ensemble retriever...")
    
    # PromptTemplate for Vaccine Assistant MultiQuery Retriever
    VACCINE_MULTIQUERY_PROMPT = PromptTemplate(
        input_variables=["question"],
        template="""You are an AI assistant specialized in vaccine-related medical information retrieval.
    Your task is to generate multiple search queries based on the original question to find relevant information from official vaccine medical documents.

    IMPORTANT GUIDELINES:
    - Keep all vaccine-specific terminology and medical terms intact
    - Maintain the clinical and medical context
    - Focus on evidence-based vaccine information
    - Preserve any specific vaccine names, diseases, or medical conditions mentioned
    - Generate queries that would help retrieve information about vaccine schedules, dosing, contraindications, adverse events, and disease prevention

    Original question: {question}

    Generate 4 different search queries that rephrase the original question while maintaining vaccine terminology and medical accuracy. Each query should approach the topic from a slightly different angle to maximize retrieval from vaccine medical documents.

    Provide only the alternative questions, one per line."""
    )
    

    
    # Vector retriever
    vector_retriever = vectorstore.as_retriever(
        search_type="similarity",
        search_kwargs={"k": vector_k}
    )
    print(f"βœ… Vector retriever created (k={vector_k})")
    
    # BM25 retriever
    bm25_retriever = BM25Retriever.from_documents(docs)
    bm25_retriever.k = bm25_k
    print(f"βœ… BM25 retriever created (k={bm25_k})")
    
    # Ensemble retriever
    ensemble_retriever = EnsembleRetriever(
        retrievers=[vector_retriever, bm25_retriever],
        weights=[0.5, 0.5]
    )
    print("βœ… Ensemble retriever created (weights: 0.5, 0.5)")
    
    
    # Multi-query expanding retriever (only for filtered mode)
    expanding_retriever = MultiQueryRetriever.from_llm(
        retriever=ensemble_retriever,
        llm=llm,
        prompt=VACCINE_MULTIQUERY_PROMPT,
    )
    print("βœ… Multi-query expanding retriever created")
    
    return expanding_retriever

def convert_chromadb_to_llamaindex_nodes(chromadb_documents: List) -> List[TextNode]:
    """Convert ChromaDB Document objects to LlamaIndex TextNode objects"""
    nodes = []
    for i, doc in enumerate(chromadb_documents):
        try:
            text = doc.page_content
            metadata = doc.metadata.copy()
            element_id = metadata.get("element_id", f"doc_{i}")
            source = metadata.get("source", "unknown")
            node_id = f"{source}_{element_id}"

            node = TextNode(
                text=text,
                metadata=metadata,
                id_=node_id
            )
            nodes.append(node)
        except Exception as e:
            continue
    return nodes

def section_tool_wrapper(retriever, section_path_chunks, query):
    """Generic section tool wrapper"""
    print(f"πŸ” TOOL CALL: Searching for query: '{query[:100]}...' in {section_path_chunks}")
    
    try:
        retrieved_docs = retriever.get_relevant_documents(query)
        print(f"πŸ“„ Retrieved {len(retrieved_docs)} documents")
        
        nodes_from_retrieved_docs = convert_chromadb_to_llamaindex_nodes(retrieved_docs)

        if not nodes_from_retrieved_docs:
            print("❌ No relevant documents found for the query")
            return "No relevant documents found for the query."

        chunk_ids = [node.metadata['element_id'] for node in retrieved_docs]
        print(f"πŸ†” Found chunk IDs: {chunk_ids}")
        
        with open(section_path_chunks, "r", encoding="utf-8") as f:
            chunks_data = json.load(f)

        chunks_unique = [node for node in chunks_data if node.get('element_id', 'Unknown') in chunk_ids]
        print(f"βœ… Matched {len(chunks_unique)} unique chunks")
        
        combined_text = []
        
        for chu in chunks_unique:
            if "TableElement" == chu["type"]:
                text = f"[Source: {chu['elements']['element_id']}]\n CONTENT: \n{chu['text']}\n HTML: \n {chu['table_text_as_html']}  \n\n"
                combined_text.append(text)
            else:
                for element in chu["elements"]:
                    text = f"[Source: {element['element_id']}]\n CONTENT: \n{element['text']} \n\n"
                    combined_text.append(text)

        result = "\n---\n".join(combined_text)
        print(f"βœ… TOOL RESPONSE: Generated response with {len(combined_text)} text sections")
        return result
    except Exception as e:
        print(f"❌ TOOL ERROR: {e}")
        return f"Error retrieving documents: {str(e)}"

def create_section_tools(embedding_function, llm):
    """
    Create all section-specific retrieval tools with improved descriptions for accurate routing.
    """
    print("πŸ› οΈ Creating section-specific retrieval tools with enhanced descriptions...")

    # Define section paths - Fixed path structure
    section_paths = {
        'one': './data/section_one_chunks.json',
        'two': './data/section_two_chunks.json',
        'three': './data/section_three_chunks.json',
        'four': './data/section_four_chunks.json',
        'five': './data/section_five_chunks.json',
        'six': './data/section_six_chunks.json',
        'seven': './data/section_seven_chunks.json',
        'eight': './data/section_eight_chunks.json',
        'nine': './data/section_nine_chunks.json',
        'ten': './data/section_ten_chunks.json'
    }

    # Create retrievers for each section
    section_retrievers = {}
    for section, path in section_paths.items():
        try:
            if os.path.exists(path):
                print(f"πŸ“ Creating retriever for section {section} from {path}")
                vstore, docs = create_vectorstore_from_json(path, f"Guide_2023_{section}", embedding_function)
                section_retrievers[section] = create_retriever(vstore, docs, llm, bm25_k=7, vector_k=10)
                print(f"βœ… Successfully created retriever for section {section}")
            else:
                print(f"⚠️ Warning: File not found for section {section}: {path}")
                section_retrievers[section] = None
        except Exception as e:
            print(f"❌ Error creating retriever for section {section}: {e}")
            section_retrievers[section] = None

    # Create main guide retriever
    guide_path = './data/Guide-pratique-de-mise-en-oeuvre-du-calendrier-national-de-vaccination-2023.json'
    guide_retriever = None
    try:
        if os.path.exists(guide_path):
            print("πŸ“š Creating main guide retriever...")
            guide_vstore, guide_docs = create_vectorstore_from_json(guide_path, "Guide_2023_multilingual", embedding_function)
            guide_retriever = create_retriever(guide_vstore, guide_docs, llm)
            print("βœ… Successfully created main guide retriever")
        else:
            print(f"⚠️ Warning: Main guide file not found: {guide_path}")
    except Exception as e:
        print(f"❌ Error creating main guide retriever: {e}")

    # WHO Immunization in Practice Tool
    immunization_path = './data/Immunization in Practice_WHO_eng_2015.json'
    immunization_retriever = None
    try:
        if os.path.exists(immunization_path):
            print("🌍 Creating immunization retriever...")
            immunization_vstore, immunization_docs = create_vectorstore_from_json(
                immunization_path,
                "Immunization_in_Practice_WHO_eng_2015",
                embedding_function
            )
            immunization_retriever = create_retriever(immunization_vstore, immunization_docs, llm)
            print("βœ… Successfully created immunization retriever")
        else:
            print(f"⚠️ Warning: Immunization file not found: {immunization_path}")
    except Exception as e:
        print(f"❌ Error creating immunization retriever: {e}")

    # --- Tool Definitions with Improved Descriptions ---

    def general_guide_tool(query: str) -> str:
        """
        A general-purpose tool for the Algerian National Vaccination Guide.
        **Use this tool as a fallback** if no other specific tool seems appropriate, or for very broad, multi-topic questions
        (e.g., 'Summarize the Algerian vaccination policy and its safety measures').
        **Always prefer a more specific tool if the query matches its description** (e.g., use 'cold_chain_tool' for temperature questions).

        Args:
            query (str): A broad or ambiguous question about the Algerian National Vaccination Guide.

        Returns:
            str: Content retrieved from the entire guide.
        """
        print(f"πŸ₯ GENERAL GUIDE TOOL CALLED (FALLBACK): {query[:50]}...")
        if not guide_retriever:
            return "Guide retriever not available - main guide file may be missing"
        return section_tool_wrapper(guide_retriever, guide_path, query)

    def who_immunization_tool(query: str) -> str:
        """
        Provides information from the WHO's 'Immunization in Practice' guide. Use this for questions about
        **global immunization standards**, international best practices, or for comparing Algerian policy to
        general WHO recommendations on topics like cold chain, safety, and disease control.

        Args:
            query (str): A question seeking global or general immunization practices.

        Returns:
            str: Content from the WHO Immunization in Practice guide.
        """
        print(f"🌍 WHO TOOL CALLED: {query[:50]}...")
        if not immunization_retriever:
            return "Immunization in Practice retriever not available - WHO guide file may be missing"
        return section_tool_wrapper(immunization_retriever, immunization_path, query)

    def program_overview_tool(query: str) -> str:
        """
        (Section 1) The primary tool for questions about the **history, objectives, and structure** of Algeria's
        national immunization program (PEV - Programme Γ‰largi de Vaccination). Use this for topics like
        the program's rationale, key achievements, and the reasons for updates to the vaccination calendar.

        Args:
            query (str): A question about the foundation or evolution of the PEV.

        Returns:
            str: Response from Section 1.
        """
        print(f"πŸ“‹ PROGRAM OVERVIEW (S1) TOOL CALLED: {query[:50]}...")
        if not section_retrievers.get('one'):
            return "Section 1 retriever not available"
        return section_tool_wrapper(section_retrievers['one'], section_paths['one'], query)

    def disease_info_tool(query: str) -> str:
        """
        (Section 2) The definitive tool for information on **specific vaccine-preventable diseases**.
        Use this to find details on **symptoms, transmission methods, complications**, and prevention
        strategies for diseases like Diphtheria, Measles, Polio, Tetanus, etc.

        Args:
            query (str): A question about a disease covered by the national vaccination program.

        Returns:
            str: Disease-specific content from Section 2.
        """
        print(f"🦠 DISEASE INFO (S2) TOOL CALLED: {query[:50]}...")
        if not section_retrievers.get('two'):
            return "Section 2 retriever not available"
        return section_tool_wrapper(section_retrievers['two'], section_paths['two'], query)

    def vaccine_properties_tool(query: str) -> str:
        """
        (Section 3) The specific tool for questions about the **vaccines themselves**: their types (e.g., BCG, ROR,
        DTCaVPI), composition, whether they are live or inactivated, and the correct **method of administration**
        (e.g., intradermal, intramuscular, oral).

        Args:
            query (str): A question about a vaccine's formulation or how it is administered.

        Returns:
            str: Vaccine-specific info from Section 3.
        """
        print(f"πŸ’‰ VACCINE PROPERTIES (S3) TOOL CALLED: {query[:50]}...")
        if not section_retrievers.get('three'):
            return "Section 3 retriever not available"
        return section_tool_wrapper(section_retrievers['three'], section_paths['three'], query)

    def catch_up_vaccination_tool(query: str) -> str:
        """
        (Section 4) Specialized tool for **missed or delayed vaccinations (rattrapage vaccinal)**.
        Use this for questions about creating a **catch-up schedule** for a child who is behind
        on their shots, based on their age and vaccination history.

        Args:
            query (str): A question about catch-up vaccination due to a delay or missed dose.

        Returns:
            str: Catch-up schedule guidance from Section 4.
        """
        print(f"πŸ”„ CATCH-UP (S4) TOOL CALLED: {query[:50]}...")
        if not section_retrievers.get('four'):
            return "Section 4 retriever not available"
        return section_tool_wrapper(section_retrievers['four'], section_paths['four'], query)

    def special_populations_tool(query: str) -> str:
        """
        (Section 5) The designated tool for vaccination guidelines concerning **special populations**.
        Use for questions about vaccinating preterm infants, allergic children, or patients with
        immunosuppression, chronic illnesses (cardiac, pulmonary), or other specific health conditions.

        Args:
            query (str): A question about tailored vaccination for a vulnerable or special group.

        Returns:
            str: Custom recommendations from Section 5.
        """
        print(f"πŸ‘₯ SPECIAL POPULATIONS (S5) TOOL CALLED: {query[:50]}...")
        if not section_retrievers.get('five'):
            return "Section 5 retriever not available"
        return section_tool_wrapper(section_retrievers['five'], section_paths['five'], query)

    def cold_chain_tool(query: str) -> str:
        """
        (Section 6) The definitive tool for all questions about the **cold chain**, including vaccine **storage
        temperatures**, transport protocols, refrigerators, temperature monitoring (like PCV pastilles),
        and procedures for handling cold chain failures or power outages.

        Args:
            query (str): A logistics-related question about vaccine temperature management.

        Returns:
            str: Cold chain instructions from Section 6.
        """
        print(f"❄️ COLD CHAIN (S6) TOOL CALLED: {query[:50]}...")
        if not section_retrievers.get('six'):
            return "Section 6 retriever not available"
        return section_tool_wrapper(section_retrievers['six'], section_paths['six'], query)

    def injection_safety_tool(query: str) -> str:
        """
        (Section 7) The primary tool for questions related to the **safe administration of injections**.
        Use for topics like sterile equipment, proper injection techniques, preventing needlestick injuries,
        and safe disposal of medical waste (DASRI).

        Args:
            query (str): A question about how to perform vaccine injections safely.

        Returns:
            str: Best practices from Section 7.
        """
        print(f"πŸ›‘οΈ INJECTION SAFETY (S7) TOOL CALLED: {query[:50]}...")
        if not section_retrievers.get('seven'):
            return "Section 7 retriever not available"
        return section_tool_wrapper(section_retrievers['seven'], section_paths['seven'], query)

    def session_management_tool(query: str) -> str:
        """
        (Section 8) Use this tool for questions about the **operational conduct of a vaccination session**
        and **vaccinovigilance**. This includes preparing the session, material setup, registering vaccination
        acts, and monitoring/reporting adverse events post-vaccination (MPVI).

        Args:
            query (str): A question about running a vaccination session or post-vaccine monitoring.

        Returns:
            str: Workflow and safety monitoring details from Section 8.
        """
        print(f"πŸ“Š SESSION MGMT (S8) TOOL CALLED: {query[:50]}...")
        if not section_retrievers.get('eight'):
            return "Section 8 retriever not available"
        return section_tool_wrapper(section_retrievers['eight'], section_paths['eight'], query)

    def planning_and_logistics_tool(query: str) -> str:
        """
        (Section 9) This tool is for **planning vaccination sessions and managing logistics**. Use it for
        questions about creating operational maps, estimating vaccine and supply needs, managing stock,
        and reducing vaccine wastage.

        Args:
            query (str): A question about organizing vaccination services or managing stock.

        Returns:
            str: Planning and stock guidance from Section 9.
        """
        print(f"πŸ“… PLANNING & LOGISTICS (S9) TOOL CALLED: {query[:50]}...")
        if not section_retrievers.get('nine'):
            return "Section 9 retriever not available"
        return section_tool_wrapper(section_retrievers['nine'], section_paths['nine'], query)

    def communication_tool(query: str) -> str:
        """
        (Section 10) The specific tool for **social mobilization and communication**. Use this for
        questions about communication strategies, addressing **vaccine hesitancy**, managing rumors,
        and community outreach to promote vaccination.

        Args:
            query (str): A question about public engagement or communication for vaccination.

        Returns:
            str: Public mobilization strategies from Section 10.
        """
        print(f"πŸ“’ COMMUNICATION (S10) TOOL CALLED: {query[:50]}...")
        if not section_retrievers.get('ten'):
            return "Section 10 retriever not available"
        return section_tool_wrapper(section_retrievers['ten'], section_paths['ten'], query)

    # Create FunctionTool objects with new, clearer names
    tools = [
        FunctionTool.from_defaults(name="general_guide_tool", fn=general_guide_tool),
        FunctionTool.from_defaults(name="who_immunization_tool", fn=who_immunization_tool),
        # Section-specific tools
        FunctionTool.from_defaults(name="program_overview_tool", fn=program_overview_tool),
        FunctionTool.from_defaults(name="disease_info_tool", fn=disease_info_tool),
        FunctionTool.from_defaults(name="vaccine_properties_tool", fn=vaccine_properties_tool),
        FunctionTool.from_defaults(name="catch_up_vaccination_tool", fn=catch_up_vaccination_tool),
        FunctionTool.from_defaults(name="special_populations_tool", fn=special_populations_tool),
        FunctionTool.from_defaults(name="cold_chain_tool", fn=cold_chain_tool),
        FunctionTool.from_defaults(name="injection_safety_tool", fn=injection_safety_tool),
        FunctionTool.from_defaults(name="session_management_tool", fn=session_management_tool),
        FunctionTool.from_defaults(name="planning_and_logistics_tool", fn=planning_and_logistics_tool),
        FunctionTool.from_defaults(name="communication_tool", fn=communication_tool),
    ]

    print(f"βœ… Created {len(tools)} tools with improved routing descriptions")
    return tools

def prepare_environment():
    """Main function to prepare the environment and return tools"""
    print("πŸš€ Starting environment preparation...")
    print("πŸ”§ Setting up models...")
    embedding_function, llm = setup_models()
    
    print("πŸ› οΈ Creating section tools...")
    tools = create_section_tools(embedding_function, llm)
    
    print("βœ… Environment prepared successfully!")
    print(f"πŸ“‹ Created {len(tools)} tools")
    
    return tools, llm