File size: 2,801 Bytes
8223b74 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 | from langchain.agents import Tool, AgentExecutor, ZeroShotAgent, create_react_agent
from langchain.memory import ConversationBufferMemory
from langchain_community.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import os
from sqlalchemy.orm import Session
from app.tools.labor_cost import LaborCostTool
from app.tools.material_cost import MaterialCostTool
from app.tools.margin import MarginTool
from app.prompts import PREFIX, FORMAT_INSTRUCTIONS, SUFFIX
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Get model name from environment variable or use default
MODEL_NAME = os.getenv("MODEL_NAME", "HuggingFaceH4/zephyr-7b-beta")
def create_llm():
"""Create a HuggingFacePipeline LLM"""
# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
low_cpu_mem_usage=True,
device_map="auto" if torch.cuda.is_available() else None,
# Optimize for Mistral model
use_cache=True,
quantization_config=None if torch.cuda.is_available() else {"load_in_8bit": True}
)
# Create text generation pipeline optimized for Mistral
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=512,
temperature=0.7,
top_p=0.95,
repetition_penalty=1.15,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
# Create LangChain wrapper
llm = HuggingFacePipeline(pipeline=pipe)
return llm
def create_agent(db: Session):
"""Create a LangChain agent with tools"""
# Create tools
tools = [
LaborCostTool(db=db),
MaterialCostTool(db=db),
MarginTool(db=db)
]
# Create LLM
llm = create_llm()
# Create prompt template
prompt = ZeroShotAgent.create_prompt(
tools=tools,
prefix=PREFIX,
format_instructions=FORMAT_INSTRUCTIONS,
suffix=SUFFIX,
input_variables=["input", "chat_history", "agent_scratchpad"]
)
# Create memory
memory = ConversationBufferMemory(memory_key="chat_history")
# Create agent
agent_chain = create_react_agent(llm=llm, tools=tools, prompt=prompt)
# Create agent executor
agent_executor = AgentExecutor.from_agent_and_tools(
agent=agent_chain,
tools=tools,
memory=memory,
verbose=True,
handle_parsing_errors=True,
max_iterations=5
)
return agent_executor |