from langchain.agents import Tool, AgentExecutor, ZeroShotAgent, create_react_agent from langchain.memory import ConversationBufferMemory from langchain_community.llms import HuggingFacePipeline from langchain.prompts import PromptTemplate from langchain.chains import LLMChain from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import torch import os from sqlalchemy.orm import Session from app.tools.labor_cost import LaborCostTool from app.tools.material_cost import MaterialCostTool from app.tools.margin import MarginTool from app.prompts import PREFIX, FORMAT_INSTRUCTIONS, SUFFIX from dotenv import load_dotenv # Load environment variables load_dotenv() # Get model name from environment variable or use default MODEL_NAME = os.getenv("MODEL_NAME", "HuggingFaceH4/zephyr-7b-beta") def create_llm(): """Create a HuggingFacePipeline LLM""" # Load model and tokenizer tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, low_cpu_mem_usage=True, device_map="auto" if torch.cuda.is_available() else None, # Optimize for Mistral model use_cache=True, quantization_config=None if torch.cuda.is_available() else {"load_in_8bit": True} ) # Create text generation pipeline optimized for Mistral pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512, temperature=0.7, top_p=0.95, repetition_penalty=1.15, do_sample=True, pad_token_id=tokenizer.eos_token_id ) # Create LangChain wrapper llm = HuggingFacePipeline(pipeline=pipe) return llm def create_agent(db: Session): """Create a LangChain agent with tools""" # Create tools tools = [ LaborCostTool(db=db), MaterialCostTool(db=db), MarginTool(db=db) ] # Create LLM llm = create_llm() # Create prompt template prompt = ZeroShotAgent.create_prompt( tools=tools, prefix=PREFIX, format_instructions=FORMAT_INSTRUCTIONS, suffix=SUFFIX, input_variables=["input", "chat_history", "agent_scratchpad"] ) # Create memory memory = ConversationBufferMemory(memory_key="chat_history") # Create agent agent_chain = create_react_agent(llm=llm, tools=tools, prompt=prompt) # Create agent executor agent_executor = AgentExecutor.from_agent_and_tools( agent=agent_chain, tools=tools, memory=memory, verbose=True, handle_parsing_errors=True, max_iterations=5 ) return agent_executor