|
|
|
|
|
from typing import List, Dict, Any, Optional |
|
|
|
|
|
|
|
|
from backend.config.settings import settings |
|
|
from backend.config.logging_config import get_logger |
|
|
from backend.services.vector_store import vector_store_service |
|
|
|
|
|
|
|
|
logger = get_logger("llm_service") |
|
|
|
|
|
class LLMService: |
|
|
"""LLM service using ConversationalRetrievalChain for RAG pipeline""" |
|
|
|
|
|
def __init__(self): |
|
|
logger.info("π€ Initializing LLM Service...") |
|
|
|
|
|
try: |
|
|
self.llm = self._setup_llm() |
|
|
self.retriever = self._setup_retriever() |
|
|
self.memory = self._setup_memory() |
|
|
self.qa_chain = self._setup_qa_chain() |
|
|
|
|
|
logger.info("π LLM Service initialized successfully") |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"β LLM Service initialization failed: {str(e)}", exc_info=True) |
|
|
raise |
|
|
|
|
|
def _setup_llm(self): |
|
|
"""Setup LLM based on configuration with conditional imports""" |
|
|
llm_config = settings.get_llm_config() |
|
|
provider = llm_config["provider"] |
|
|
|
|
|
logger.info(f"π§ Setting up LLM provider: {provider}") |
|
|
|
|
|
if provider == "openai": |
|
|
try: |
|
|
from langchain_openai import ChatOpenAI |
|
|
logger.info("β
OpenAI LLM imported successfully") |
|
|
|
|
|
|
|
|
temperature = llm_config["temperature"] |
|
|
model = llm_config["model"] |
|
|
max_tokens = llm_config.get("max_tokens", 1000) |
|
|
|
|
|
|
|
|
if "gpt-5-nano" in model.lower(): |
|
|
temperature = 1.0 |
|
|
logger.info(f"π§ Using temperature=1.0 for {model} (model restriction)") |
|
|
|
|
|
|
|
|
logger.info(f"π― OpenAI config - Model: {model}, Output tokens: {max_tokens}, Temperature: {temperature}") |
|
|
|
|
|
return ChatOpenAI( |
|
|
api_key=llm_config["api_key"], |
|
|
model=model, |
|
|
temperature=temperature, |
|
|
max_tokens=max_tokens |
|
|
) |
|
|
except ImportError as e: |
|
|
logger.error(f"β OpenAI LLM not available: {e}") |
|
|
raise ImportError("OpenAI provider selected but langchain_openai not installed") |
|
|
|
|
|
elif provider == "google": |
|
|
try: |
|
|
from langchain_google_genai import ChatGoogleGenerativeAI |
|
|
logger.info("β
Google LLM imported successfully") |
|
|
|
|
|
max_output_tokens = llm_config.get("max_tokens", 1000) |
|
|
model = llm_config["model"] |
|
|
temperature = llm_config["temperature"] |
|
|
|
|
|
|
|
|
logger.info(f"π― Google config - Model: {model}, Output tokens: {max_output_tokens}, Temperature: {temperature}") |
|
|
|
|
|
return ChatGoogleGenerativeAI( |
|
|
google_api_key=llm_config["api_key"], |
|
|
model=model, |
|
|
temperature=temperature, |
|
|
max_output_tokens=max_output_tokens |
|
|
) |
|
|
except ImportError as e: |
|
|
logger.error(f"β Google LLM not available: {e}") |
|
|
raise ImportError("Google provider selected but langchain_google_genai not installed") |
|
|
|
|
|
elif provider == "ollama": |
|
|
try: |
|
|
from langchain_community.llms import Ollama |
|
|
logger.info("β
Ollama LLM imported successfully") |
|
|
return Ollama( |
|
|
base_url=llm_config["base_url"], |
|
|
model=llm_config["model"], |
|
|
temperature=llm_config["temperature"] |
|
|
) |
|
|
except ImportError as e: |
|
|
logger.error(f"β Ollama LLM not available: {e}") |
|
|
raise ImportError("Ollama provider selected but langchain_community not installed") |
|
|
|
|
|
elif provider == "huggingface": |
|
|
try: |
|
|
|
|
|
use_api = llm_config.get("use_api", False) |
|
|
|
|
|
if use_api: |
|
|
|
|
|
try: |
|
|
from langchain_huggingface import HuggingFaceEndpoint |
|
|
logger.info("β
Using HuggingFace API (no local download)") |
|
|
|
|
|
return HuggingFaceEndpoint( |
|
|
repo_id=llm_config["model"], |
|
|
huggingfacehub_api_token=llm_config["api_token"], |
|
|
temperature=0.7, |
|
|
max_new_tokens=200, |
|
|
repetition_penalty=1.1, |
|
|
top_p=0.9 |
|
|
) |
|
|
except Exception as api_error: |
|
|
logger.warning(f"β οΈ HuggingFace API failed: {api_error}") |
|
|
logger.info("π Falling back to HuggingFace Hub API...") |
|
|
|
|
|
|
|
|
try: |
|
|
from langchain_community.llms import HuggingFaceHub |
|
|
|
|
|
return HuggingFaceHub( |
|
|
repo_id=llm_config["model"], |
|
|
huggingfacehub_api_token=llm_config["api_token"], |
|
|
model_kwargs={ |
|
|
"temperature": 0.7, |
|
|
"max_new_tokens": 200, |
|
|
"repetition_penalty": 1.1, |
|
|
"top_p": 0.9, |
|
|
"do_sample": True |
|
|
} |
|
|
) |
|
|
except Exception as hub_error: |
|
|
logger.error(f"β HuggingFace Hub also failed: {hub_error}") |
|
|
raise ImportError(f"Both HuggingFace API methods failed: {api_error}, {hub_error}") |
|
|
else: |
|
|
|
|
|
from langchain_huggingface import HuggingFacePipeline |
|
|
from transformers import pipeline |
|
|
|
|
|
logger.info("β
Using HuggingFace local pipeline") |
|
|
|
|
|
|
|
|
pipeline_kwargs = { |
|
|
"task": "text-generation", |
|
|
"model": llm_config["model"], |
|
|
"max_length": 512, |
|
|
"do_sample": True, |
|
|
"temperature": 0.7, |
|
|
"pad_token_id": 50256, |
|
|
"eos_token_id": 50256, |
|
|
} |
|
|
|
|
|
|
|
|
if llm_config.get("use_gpu", False): |
|
|
pipeline_kwargs["device_map"] = "auto" |
|
|
else: |
|
|
|
|
|
pipeline_kwargs["device"] = "cpu" |
|
|
|
|
|
hf_pipeline = pipeline(**pipeline_kwargs) |
|
|
|
|
|
return HuggingFacePipeline( |
|
|
pipeline=hf_pipeline, |
|
|
model_kwargs={ |
|
|
"temperature": 0.7, |
|
|
"max_new_tokens": 150, |
|
|
"do_sample": True, |
|
|
"top_p": 0.9, |
|
|
"repetition_penalty": 1.1, |
|
|
"early_stopping": True, |
|
|
"num_beams": 4 |
|
|
} |
|
|
) |
|
|
except ImportError as e: |
|
|
logger.error(f"β HuggingFace LLM not available: {e}") |
|
|
raise ImportError("HuggingFace provider selected but required packages not installed") |
|
|
|
|
|
else: |
|
|
logger.warning(f"β οΈ Unknown LLM provider '{provider}', falling back to OpenAI") |
|
|
try: |
|
|
from langchain_openai import ChatOpenAI |
|
|
return ChatOpenAI() |
|
|
except ImportError: |
|
|
logger.error("β No valid LLM provider available") |
|
|
raise ImportError("No valid LLM provider available") |
|
|
|
|
|
def _setup_retriever(self): |
|
|
"""Setup retriever from vector store service""" |
|
|
return vector_store_service.get_retriever() |
|
|
|
|
|
def _setup_memory(self): |
|
|
"""Setup conversation memory""" |
|
|
try: |
|
|
from langchain.memory import ConversationBufferMemory |
|
|
return ConversationBufferMemory(memory_key='chat_history', return_messages=True) |
|
|
except ImportError as e: |
|
|
logger.error(f"β ConversationBufferMemory not available: {e}") |
|
|
raise ImportError("langchain memory not available") |
|
|
|
|
|
def _setup_qa_chain(self): |
|
|
"""Setup ConversationalRetrievalChain""" |
|
|
try: |
|
|
from langchain.chains import ConversationalRetrievalChain |
|
|
return ConversationalRetrievalChain.from_llm( |
|
|
llm=self.llm, |
|
|
retriever=self.retriever, |
|
|
memory=self.memory, |
|
|
verbose=settings.LANGCHAIN_DEBUG |
|
|
) |
|
|
except ImportError as e: |
|
|
logger.error(f"β ConversationalRetrievalChain not available: {e}") |
|
|
raise ImportError("langchain chains not available") |
|
|
|
|
|
def _preprocess_query(self, question: str) -> str: |
|
|
"""Preprocess user query to improve vector search accuracy""" |
|
|
import re |
|
|
|
|
|
|
|
|
processed = question.lower() |
|
|
|
|
|
|
|
|
stop_words = ['i', 'want', 'a', 'an', 'the', 'for', 'with', 'can', 'you', 'give', 'me', 'please', 'help'] |
|
|
words = processed.split() |
|
|
words = [word for word in words if word not in stop_words] |
|
|
|
|
|
|
|
|
processed = ' '.join(words) |
|
|
processed = re.sub(r'[^\w\s]', '', processed) |
|
|
|
|
|
|
|
|
processed = ' '.join(processed.split()) |
|
|
|
|
|
logger.debug(f"π§ Query preprocessing: '{question}' β '{processed}'") |
|
|
return processed |
|
|
|
|
|
def ask_question(self, user_question: str) -> str: |
|
|
"""Ask a question using the conversational retrieval chain""" |
|
|
logger.info(f"β Processing: '{user_question[:60]}...'") |
|
|
|
|
|
try: |
|
|
|
|
|
processed_query = self._preprocess_query(user_question) |
|
|
|
|
|
|
|
|
document_retriever = getattr(self.qa_chain, 'retriever', None) |
|
|
retrieved_context = "" |
|
|
if document_retriever: |
|
|
|
|
|
original_docs = document_retriever.invoke(user_question) |
|
|
processed_docs = document_retriever.invoke(processed_query) |
|
|
|
|
|
|
|
|
seen_content = set() |
|
|
unique_documents = [] |
|
|
for document in original_docs + processed_docs: |
|
|
if document.page_content not in seen_content: |
|
|
unique_documents.append(document) |
|
|
seen_content.add(document.page_content) |
|
|
|
|
|
retrieved_context = "\n".join([doc.page_content for doc in unique_documents[:8]]) |
|
|
logger.debug(f"π Retrieved {len(unique_documents)} unique documents") |
|
|
|
|
|
|
|
|
enhanced_question = f"""Based on the available recipe information, please answer this cooking question: "{user_question}" |
|
|
|
|
|
Respond directly and naturally as if you're sharing your own culinary knowledge. If there's a specific recipe that matches the request, share the complete recipe with ingredients and step-by-step instructions in a friendly, conversational way.""" |
|
|
|
|
|
result = self.qa_chain({"question": enhanced_question}) |
|
|
generated_answer = result["answer"] |
|
|
|
|
|
self._log_token_usage(user_question, retrieved_context, generated_answer) |
|
|
|
|
|
logger.info(f"β
Response generated ({len(generated_answer)} chars)") |
|
|
return generated_answer |
|
|
|
|
|
except Exception as error: |
|
|
logger.error(f"β Error in ask_question: {str(error)}") |
|
|
return f"Sorry, I encountered an error: {str(error)}" |
|
|
|
|
|
def _count_tokens(self, text: str) -> int: |
|
|
"""Count tokens in text (rough estimate for debugging)""" |
|
|
return len(text) // 4 if text else 0 |
|
|
|
|
|
def _log_token_usage(self, question: str, context: str, response: str): |
|
|
"""Log token usage for monitoring""" |
|
|
question_tokens = self._count_tokens(question) |
|
|
context_tokens = self._count_tokens(context) |
|
|
response_tokens = self._count_tokens(response) |
|
|
total_input_tokens = question_tokens + context_tokens |
|
|
|
|
|
logger.info(f"π Token Usage - Input:{total_input_tokens} (Q:{question_tokens}+C:{context_tokens}), Output:{response_tokens}") |
|
|
|
|
|
if context_tokens > 3000: |
|
|
logger.warning(f"β οΈ Large context detected: {context_tokens} tokens") |
|
|
|
|
|
return { |
|
|
"input_tokens": total_input_tokens, |
|
|
"output_tokens": response_tokens, |
|
|
"total_tokens": total_input_tokens + response_tokens |
|
|
} |
|
|
|
|
|
def clear_memory(self): |
|
|
"""Clear conversation memory""" |
|
|
try: |
|
|
if hasattr(self.memory, 'clear'): |
|
|
self.memory.clear() |
|
|
logger.info("β
Memory cleared") |
|
|
return True |
|
|
except Exception as e: |
|
|
logger.warning(f"β οΈ Could not clear memory: {e}") |
|
|
return False |
|
|
|
|
|
def simple_chat_completion(self, user_message: str) -> str: |
|
|
"""Simple chat completion without RAG - direct LLM response""" |
|
|
logger.info(f"π Simple chat: '{user_message[:50]}...'") |
|
|
|
|
|
try: |
|
|
llm_prompt = f"As a knowledgeable cooking expert, share your insights about {user_message}. Provide helpful culinary advice and recommendations:\n\n" |
|
|
|
|
|
llm_response = self.llm.invoke(llm_prompt) if hasattr(self.llm, 'invoke') else self.llm(llm_prompt) |
|
|
|
|
|
|
|
|
if hasattr(llm_response, 'content'): |
|
|
generated_answer = llm_response.content |
|
|
elif isinstance(llm_response, str): |
|
|
generated_answer = llm_response.replace(llm_prompt, "").strip() if llm_prompt in llm_response else llm_response |
|
|
else: |
|
|
generated_answer = str(llm_response) |
|
|
|
|
|
|
|
|
generated_answer = generated_answer.strip() |
|
|
if not generated_answer or len(generated_answer) < 10: |
|
|
generated_answer = "I'd be happy to help with recipes! Ask me about specific ingredients or dishes." |
|
|
|
|
|
|
|
|
if len(generated_answer) > 300: |
|
|
answer_sentences = generated_answer.split('. ') |
|
|
generated_answer = '. '.join(answer_sentences[:2]) + '.' if len(answer_sentences) > 1 else generated_answer[:300] |
|
|
|
|
|
logger.info(f"β
Response generated ({len(generated_answer)} chars)") |
|
|
return generated_answer |
|
|
|
|
|
except Exception as error: |
|
|
logger.error(f"β Simple chat completion error: {str(error)}") |
|
|
return f"Sorry, I encountered an error: {str(error)}" |
|
|
|
|
|
|
|
|
llm_service = LLMService() |
|
|
|