|
|
from langchain.agents import Tool, AgentExecutor, ZeroShotAgent, create_react_agent |
|
|
from langchain.memory import ConversationBufferMemory |
|
|
from langchain_community.llms import HuggingFacePipeline |
|
|
from langchain.prompts import PromptTemplate |
|
|
from langchain.chains import LLMChain |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
|
import torch |
|
|
import os |
|
|
from sqlalchemy.orm import Session |
|
|
from app.tools.labor_cost import LaborCostTool |
|
|
from app.tools.material_cost import MaterialCostTool |
|
|
from app.tools.margin import MarginTool |
|
|
from app.prompts import PREFIX, FORMAT_INSTRUCTIONS, SUFFIX |
|
|
from dotenv import load_dotenv |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
MODEL_NAME = os.getenv("MODEL_NAME", "HuggingFaceH4/zephyr-7b-beta") |
|
|
|
|
|
def create_llm(): |
|
|
"""Create a HuggingFacePipeline LLM""" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
MODEL_NAME, |
|
|
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, |
|
|
low_cpu_mem_usage=True, |
|
|
device_map="auto" if torch.cuda.is_available() else None, |
|
|
|
|
|
use_cache=True, |
|
|
quantization_config=None if torch.cuda.is_available() else {"load_in_8bit": True} |
|
|
) |
|
|
|
|
|
|
|
|
pipe = pipeline( |
|
|
"text-generation", |
|
|
model=model, |
|
|
tokenizer=tokenizer, |
|
|
max_new_tokens=512, |
|
|
temperature=0.7, |
|
|
top_p=0.95, |
|
|
repetition_penalty=1.15, |
|
|
do_sample=True, |
|
|
pad_token_id=tokenizer.eos_token_id |
|
|
) |
|
|
|
|
|
|
|
|
llm = HuggingFacePipeline(pipeline=pipe) |
|
|
|
|
|
return llm |
|
|
|
|
|
def create_agent(db: Session): |
|
|
"""Create a LangChain agent with tools""" |
|
|
|
|
|
tools = [ |
|
|
LaborCostTool(db=db), |
|
|
MaterialCostTool(db=db), |
|
|
MarginTool(db=db) |
|
|
] |
|
|
|
|
|
|
|
|
llm = create_llm() |
|
|
|
|
|
|
|
|
prompt = ZeroShotAgent.create_prompt( |
|
|
tools=tools, |
|
|
prefix=PREFIX, |
|
|
format_instructions=FORMAT_INSTRUCTIONS, |
|
|
suffix=SUFFIX, |
|
|
input_variables=["input", "chat_history", "agent_scratchpad"] |
|
|
) |
|
|
|
|
|
|
|
|
memory = ConversationBufferMemory(memory_key="chat_history") |
|
|
|
|
|
|
|
|
agent_chain = create_react_agent(llm=llm, tools=tools, prompt=prompt) |
|
|
|
|
|
|
|
|
agent_executor = AgentExecutor.from_agent_and_tools( |
|
|
agent=agent_chain, |
|
|
tools=tools, |
|
|
memory=memory, |
|
|
verbose=True, |
|
|
handle_parsing_errors=True, |
|
|
max_iterations=5 |
|
|
) |
|
|
|
|
|
return agent_executor |