Spaces:

AnuragShirke
/

knowledge-assistant-backend

Sleeping

Anurag Shirke

For enhancing the LLM memory limit which is hindering the performance

43fe2fe 7 months ago

1.1 kB

	import ollama
	import os

	# --- Ollama Client Initialization ---

	def get_ollama_client():
	"""Initializes and returns the Ollama client."""
	host = os.environ.get("OLLAMA_HOST", "http://localhost:11434")
	return ollama.Client(host=host)

	# --- Prompt Generation ---

	def format_prompt(query: str, context: list[dict]) -> str:
	"""Formats the prompt for the LLM with the retrieved context."""
	context_str = "\n".join([item.payload.get('text') for item in context])
	prompt = f"""Instruction:
	Answer the user's query based only on the provided context.
	If the context does not contain the answer, state that you cannot answer the question with the given information.
	Do not use any prior knowledge.

	Context:
	{context_str}

	Query:
	{query}

	Answer:
	"""
	return prompt

	# --- LLM Interaction ---

	def generate_response(client: ollama.Client, model: str, prompt: str):
	"""Generates a response from the LLM."""
	response = client.chat(
	model=model,
	messages=[{"role": "user", "content": prompt}]
	)
	return response['message']['content']