Spaces:

VaxGuide
/

Agentic_RAG

Sleeping

Agentic_RAG / agent_direct_llm_sections.py

Zeggai Abdellah

change the gemini model

b436da0 7 months ago

20.1 kB

	# agent_direct_llm_sections.py (Modified for FastAPI integration)

	import os
	import sys
	from pathlib import Path
	from functools import partial

	# LlamaIndex Core Imports
	from llama_index.core import Settings
	from llama_index.core.agent import ReActAgent
	from llama_index.core.tools import FunctionTool, ToolMetadata
	from llama_index.embeddings.huggingface import HuggingFaceEmbedding
	from llama_index.core import load_index_from_storage, StorageContext, VectorStoreIndex
	from llama_index.core.tools import QueryEngineTool

	# LLM Import (Use Gemini)
	from llama_index.llms.google_genai import GoogleGenAI
	import traceback

	SECTION_FILES_PATH = "./data/section_files"
	SUPPLEMENTARY_INDEXES_BASE_PATH_FOR_AGENT = "./storage/supplementary_indices"

	# --- Imports for API callers ---
	try:
	from api_callers import get_vaccination_statistics, get_patient_vaccination_record
	from image_callers import find_relevant_image_info
	API_CALLERS_AVAILABLE = True
	except ImportError:
	print("⚠️ Warning: api_callers.py or image_callers.py not found. API tools will be unavailable.")
	API_CALLERS_AVAILABLE = False



	def configure_settings():
	"""Configure LLM and embedding settings"""
	print("Configuring LLM (Google GenAI)...")
	gemini_api_key = os.getenv("GOOGLE_API_KEY")
	Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
	if not gemini_api_key:
	raise ValueError("GOOGLE_API_KEY environment variable not set! Required for Agent LLM.")
	Settings.llm = GoogleGenAI(model_name="models/gemini-2.0-flash", api_key=gemini_api_key)
	print(f"LLM for Agent: {Settings.llm.model}")



	def query_section_directly(
	section_file_path: str,
	section_id_for_log: str,
	section_title_for_prompt: str,
	sub_query: str
	) -> str:
	"""Query a specific section directly"""
	print(f"--- INSIDE query_section_directly (Tool for Section ID: {section_id_for_log}) ---")
	print(f" Attempting to read: {section_file_path}")
	print(f" For sub_query: '{sub_query}'")

	section_text = ""
	try:
	with open(section_file_path, 'r', encoding='utf-8') as f:
	section_text = f.read()
	print(f" Read {len(section_text)} characters from {section_file_path}.")
	if not section_text.strip():
	return f"Warning: Section file {section_file_path} (for section {section_id_for_log}) is empty or only whitespace."
	except FileNotFoundError:
	error_msg = f"Error: File not found for section {section_id_for_log} at {section_file_path}."
	print(f" ❌ {error_msg}")
	return error_msg
	except Exception as e_file:
	error_msg = f"Error reading file for section {section_id_for_log} at {section_file_path}: {str(e_file)}"
	print(f" ❌ {error_msg}")
	traceback.print_exc()
	return error_msg

	prompt = f"""
	You are an assistant analyzing specific sections of the Algerian Vaccination Protocol document.
	The current section being analyzed is Section {section_id_for_log}, titled: "{section_title_for_prompt}".

	Based ONLY on the following "Section Text", provide a DETAILED and COMPREHENSIVE answer to the "User Query".
	Extract all relevant recommendations, precautions, contraindications, specific vaccine names, and dosage information if mentioned.
	If the query involves multiple conditions, address each one thoroughly.

	CRITICALLY IMPORTANT: When you use specific information from the "Section Text" to formulate your answer,
	AND if that information in the "Section Text" is immediately preceded or followed by a page number
	in square brackets (e.g., "[72]", "[P.12]", "P.12"),
	you MUST include that page number reference in your answer in the format (P. XX).
	If multiple pieces of information from different pages are combined, cite all relevant page numbers found near the text.

	Do not use any prior knowledge. If the answer is not explicitly found in the "Section Text", state that clearly.
	Structure your answer clearly.

	User Query:
	{sub_query}

	Section Text:
	--- START OF SECTION TEXT ---
	{section_text}
	--- END OF SECTION TEXT ---

	Comprehensive Answer (with page references if found, e.g., "XYZ is recommended (P. 15). ABC should be avoided (P. 12)."):
	"""
	print(f" Sending query '{sub_query}' to LLM for section {section_id_for_log} ('{section_title_for_prompt}')...")

	try:
	response_obj = Settings.llm.complete(prompt)
	if hasattr(response_obj, 'text') and response_obj.text is not None:
	final_answer = response_obj.text.strip()
	print(f" LLM response received for section {section_id_for_log} (first 100 chars): {final_answer[:100]}...")
	return f"According to Section {section_id_for_log} ('{section_title_for_prompt}'):\n{final_answer}"
	else:
	error_msg = f"Error: LLM response object for section {section_id_for_log} missing 'text' or was None. Response: {str(response_obj)}"
	print(f" ⚠️ {error_msg}")
	return error_msg
	except Exception as e_llm:
	error_msg = f"LLM call failed for section {section_id_for_log}, query '{sub_query}': {str(e_llm)}"
	print(f" ❌ EXCEPTION during LLM call: {error_msg}")
	traceback.print_exc()
	return error_msg



	def create_agent_instance():
	"""Create and return an agent instance for API use"""
	# Section tool definitions
	section_tool_definitions = [
	{
	"id": "1", "filename": "section_1.txt",
	"title": "LE PROGRAMME ELARGI DE VACCINATION EN ALGERIE",
	"description": "Fournit des informations générales UNIQUEMENT de la Section 1 sur 'LE PROGRAMME ELARGI DE VACCINATION EN ALGERIE'. Utiliser pour des questions sur les objectifs, l'historique, les points forts/faibles du programme de vaccination algérien (PEV), et la justification des actualisations du calendrier."
	},
	{
	"id": "2", "filename": "section_2.txt",
	"title": "REPERES SUR LES MALADIES CIBLES DU CALENDRIER NATIONAL DE VACCINATION",
	"description": "Cette section contient LE CALENDRIER NATIONAL DE VACCINATION 2023 , officiel Fournit des descriptions détaillées UNIQUEMENT de la Section 2 sur les MALADIES spécifiques ciblées par le calendrier (Diphtérie, Rougeole, Oreillons, Coqueluche, Poliomyélite, Rubéole, Tétanos, Tuberculose, Hépatite B, Hib, Pneumocoques). Utiliser pour consulter le calendrier par âge , pour des questions sur la définition, transmission, symptômes, complications, et prévention de ces MALADIES."
	},
	{
	"id": "3", "filename": "section_3.txt",
	"title": "REPERES SUR LES VACCINS DU CALENDRIER NATIONAL DE VACCINATION 2023",
	"description": "Fournit des informations détaillées UNIQUEMENT de la Section 3 sur les VACCINS spécifiques du calendrier national de vaccination 2023., les méthodes d'administration (3.2), et détails sur chaque vaccin (BCG, Hépatite B, DTCaVPI-Hib-HBV, ROR, etc.) (3.3). Utiliser pour des questions sur les types de vaccins et leur administration."
	},
	{
	"id": "4", "filename": "section_4.txt",
	"title": "RATTRAPAGE VACCINAL",
	"description": (
	"Fournit des informations UNIQUEMENT de la Section 4 concernant le RATTRAPAGE VACCINAL. "
	"Utiliser pour déterminer les procédures, règles, et schémas de rattrapage pour des vaccins SPÉCIFIQUEMENT IDENTIFIÉS COMME MANQUANTS OU EN RETARD, "
	"après avoir consulté le calendrier vaccinal standard (via l'outil de la Section 11) pour l'âge du patient."
	)
	},
	{
	"id": "5", "filename": "section_5.txt",
	"title": "VACCINATION DES POPULATIONS PARTICULIÈRES",
	"description": "Fournit des informations UNIQUEMENT de la Section 5 sur la VACCINATION DES POPULATIONS PARTICULIÈRES. Inclut directives pour enfant allergique (5.1, y compris œufs), prématurés, diabète, immunosuppression (y compris VIH 5.10), etc. Utiliser pour questions sur la vaccination dans ces contextes cliniques."
	},
	{
	"id": "6", "filename": "section_6.txt",
	"title": "LES FONDAMENTAUX DE LA CHAINE DU FROID",
	"description": "Fournit des informations UNIQUEMENT de la Section 6 sur la CHAINE DU FROID pour les vaccins (principes, transport, gestion stocks, conservation, pannes)."
	},
	{
	"id": "7", "filename": "section_7.txt",
	"title": "SÉCURITÉ DES INJECTIONS",
	"description": "Fournit des informations UNIQUEMENT de la Section 7 sur la SÉCURITÉ DES INJECTIONS (matériel, techniques, prévention blessures, élimination déchets)."
	},
	{
	"id": "8", "filename": "section_8.txt",
	"title": "TENUE D'UNE SEANCE DE VACCINATION ET VACCINOVIGILANCE",
	"description": "Fournit des informations UNIQUEMENT de la Section 8 sur la TENUE D'UNE SEANCE DE VACCINATION ET LA VACCINOVIGILANCE (préparation matériel, administration, enregistrement, surveillance MAPI/MPVI)."
	},
	{
	"id": "9", "filename": "section_9.txt",
	"title": "PLANIFICATION DES SÉANCES DE VACCINATION",
	"description": "Fournit des informations UNIQUEMENT de la Section 9 sur la PLANIFICATION DES SÉANCES DE VACCINATION (carte opérationnelle, estimation besoins, gestion stocks)."
	},
	{
	"id": "10", "filename": "section_10.txt",
	"title": "MOBILISATION SOCIALE EN FAVEUR DE LA VACCINATION",
	"description": "Fournit des informations UNIQUEMENT de la Section 10 sur la MOBILISATION SOCIALE (communication, gestion rumeurs, hésitation vaccinale)."
	},
	{
	"id": "11",
	"filename": "section_11.txt",
	"title": "Calendrier National de Vaccination Algérien 2023 (Tableau Détaillé)",
	"description": (
	"Source faisant autorité contenant UNIQUEMENT LE TABLEAU DÉTAILLÉ du Calendrier National de Vaccination Algérien 2023. "
	"Consulter cet outil EXCLUSIVEMENT pour déterminer avec précision : les vaccins spécifiques recommandés à chaque âge (par exemple, à 2 mois, 11 mois, 18 mois, 3 ans, 6 ans), "
	"le nombre de doses pour chaque vaccin, l'âge exact d'administration pour chaque dose, et les intervalles requis entre les doses. "
	"C'est la référence principale pour toute question sur le schéma vaccinal standard par âge en Algérie et pour calculer les doses dues ou ce qu'un enfant aurait dû recevoir à un certain âge."
	)
	}
	]



	print("\n--- Creating Function Tools for API Agent ---")
	tools_list = []
	section_files_dir = Path(SECTION_FILES_PATH)

	# Create tools for Algerian Protocol Sections
	print(" Creating Algerian Protocol Section Tools...")
	for section_def in section_tool_definitions:
	section_file_path = section_files_dir / section_def["filename"]
	section_id_for_tool = section_def["id"]
	section_title_for_tool = section_def.get("title", f"Section {section_id_for_tool} Details")
	tool_name = f"algerian_protocol_section_{section_id_for_tool}_tool"

	if not section_file_path.exists():
	print(f" ⚠️ Warning: Protocol section file '{section_def['filename']}' not found. Skipping tool '{tool_name}'.")
	continue

	_tool_fn_with_details = partial(query_section_directly,
	section_file_path=str(section_file_path),
	section_id_for_log=section_id_for_tool,
	section_title_for_prompt=section_title_for_tool)
	def create_tool_fn_wrapper(func_with_baked_args):
	def wrapper(*, input: str): return func_with_baked_args(sub_query=input)
	return wrapper
	tool_executable_fn = create_tool_fn_wrapper(_tool_fn_with_details)

	protocol_section_tool = FunctionTool.from_defaults(
	fn=tool_executable_fn, name=tool_name, description=section_def["description"]
	)
	tools_list.append(protocol_section_tool)
	print(f" Tool '{protocol_section_tool.metadata.name}' created.")

	# Add API tools if available
	if API_CALLERS_AVAILABLE:
	print(" Creating .NET API Tools...")
	# stats_api_tool = FunctionTool.from_defaults(
	# fn=get_vaccination_statistics,
	# name="get_vaccination_statistics_tool",
	# description=(
	# "Fetches current vaccination statistics for a specified Algerian Wilaya (province/state) "
	# "from an external API, such as coverage rates. "
	# "The input should be the name of the Wilaya (e.g., 'Algiers', 'Sétif')."
	# )
	# )
	## tools_list.append(stats_api_tool)

	patient_record_api_tool = FunctionTool.from_defaults(
	fn=get_patient_vaccination_record,
	name="get_patient_vaccination_record_tool",
	description=(
	"Retrieves the vaccination record for a specific patient using their unique ID "
	"from an external API. The input should be the patient's unique identifier string."
	)
	)
	tools_list.append(patient_record_api_tool)

	# image_retrieval_tool = FunctionTool.from_defaults(
	# fn=find_relevant_image_info,
	# name="image_retrieval_tool",
	# description=(
	# "Searches for and provides information about relevant images, figures, diagrams, or flowcharts "
	# "from the Algerian vaccination protocol document based on a textual description of the desired image. "
	# "Use this if the user asks for a specific figure (e.g., 'Figure 4.1'), an illustration of a concept "
	# "(e.g., 'diagram of cold chain levels'), or a visual representation."
	# "the input for the tool should be a description of the image (e.g., 'Figure 4.1', 'diagram of cold chain levels')."
	# )
	# )
	## tools_list.append(image_retrieval_tool)

	# Add WHO document tool if available
	who_doc_index_dir_name = "who_guidelines_index"
	who_doc_index_path = Path(SUPPLEMENTARY_INDEXES_BASE_PATH_FOR_AGENT) / who_doc_index_dir_name
	who_doc_tool_name = "who_general_vaccination_guidelines_tool"
	who_doc_description = (
	"Provides general vaccination guidelines, global recommendations, and position papers "
	"from a key World Health Organization (WHO) document. Use for global perspectives, "
	"general vaccine information, or when specifically asking about WHO official guidance."
	)

	if who_doc_index_path.exists():
	print(f" Creating WHO Document Tool from index: {who_doc_index_path}...")
	try:
	who_storage_context = StorageContext.from_defaults(persist_dir=str(who_doc_index_path))
	who_vector_index = load_index_from_storage(who_storage_context)
	who_query_engine = who_vector_index.as_query_engine(similarity_top_k=3)

	who_tool = QueryEngineTool.from_defaults(
	query_engine=who_query_engine,
	name=who_doc_tool_name,
	description=who_doc_description
	)
	tools_list.append(who_tool)
	print(f" Tool '{who_tool.metadata.name}' created.")
	except Exception as e:
	print(f" ⚠️ Error creating WHO document tool from index {who_doc_index_path}: {e}")
	traceback.print_exc()

	if not tools_list:
	raise Exception("No tools were created for the agent. Check file paths and tool definitions.")


	# Create custom system prompt
	custom_system_prompt = f"""
	You are an expert assistant for Algerian vaccination protocols and related health data.
	Your primary goal is to answer the user's query accurately and comprehensively by strategically using the available tools.
	Available tools can:
	1. Query specific sections of the Algerian national vaccination protocol (tools named 'algerian_protocol_section_X_tool').
	2. Fetch relevant images, figures, or diagrams from the protocol document (tool named 'image_retrieval_tool').
	3. (If .NET API tools are included) Fetch real-time vaccination statistics or patient records.
	4. Query a general WHO document on vaccination guidelines (tool named '{who_doc_tool_name}').

	Carefully analyze the user's query.
	- If the user asks for information from a specific section of the protocol, use the corresponding 'algerian_protocol_section_X_tool'.
	- If the user asks for an image, figure, diagram, or visual representation, use the 'image_retrieval_tool' and provide a description of the image needed.
	- (If .NET API tools are included) If the user asks for statistics or a patient record, use the appropriate API tool.
	- For general global guidelines or WHO positions, use '{who_doc_tool_name}'.

	If information from multiple tools is needed, gather it step-by-step.
	IMPORTANT : When providing information from the Algerian protocol sections, include section and page references after the tool's output provides them , or atleast the page number.
	When providing information about an image, include its description and source details as returned by the tool.
	Synthesize all gathered information into a clear, final answer.
	If a query is ambiguous, ask for clarification.
	Always prioritize information from the Algerian national protocol tools for specific local recommendations if available.
	Present the final answer clearly in the user's language (likely French).
	"""

	# Create and return the agent
	agent = ReActAgent.from_tools(
	tools=tools_list,
	llm=Settings.llm,
	verbose=True,
	system_prompt=custom_system_prompt
	)

	print(f"✅ Agent created with {len(tools_list)} tools.")
	return agent



	def run_direct_protocol_agent():
	"""Original CLI function - kept for backward compatibility"""
	if not Path(SECTION_FILES_PATH).is_dir():
	print(f"❌ Error: Directory for section files not found: {SECTION_FILES_PATH}")
	sys.exit(1)

	if not os.getenv("DOTNET_API_BASE_URL") or os.getenv("DOTNET_API_BASE_URL") == "http://localhost:5030/api/":
	print(""60)
	print("⚠️ IMPORTANT WARNING: DOTNET_API_BASE_URL environment variable is not set ")
	print(" or is still the default placeholder 'http://YOUR_API_DOMAIN_OR_IP/api'.")
	print(" The .NET API tools WILL NOT function correctly. ")
	print(" Please set it to your actual .NET API base URL if you intend to use them.")
	print(""60)

	configure_settings()
	agent = create_agent_instance()

	print("\n* Enter query for Orchestrator Agent (type 'exit' to quit) *")
	while True:
	user_query = input("\nQuery: ")
	if user_query.lower() == 'exit':
	print("Exiting...")
	break
	if not user_query.strip():
	continue
	print(f"\nSending query to agent: '{user_query}'...")
	try:
	agent_response = agent.chat(user_query)
	print("\n--- Agent's Final Answer ---")
	print(agent_response.response)
	except Exception as e:
	print(f"❌ Error during agent query processing: {e}")
	traceback.print_exc()


	# Add this function for the FastAPI to use
	def run_direct_protocol_agent_for_api():
	"""This function was referenced in the original FastAPI but not needed for the simple version"""
	pass

	if __name__ == "__main__":
	try:
	run_direct_protocol_agent()
	except Exception as e:
	print(f"An fatal error occurred: {e}")
	traceback.print_exc()