File size: 2,801 Bytes
8223b74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from langchain.agents import Tool, AgentExecutor, ZeroShotAgent, create_react_agent
from langchain.memory import ConversationBufferMemory
from langchain_community.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import os
from sqlalchemy.orm import Session
from app.tools.labor_cost import LaborCostTool
from app.tools.material_cost import MaterialCostTool
from app.tools.margin import MarginTool
from app.prompts import PREFIX, FORMAT_INSTRUCTIONS, SUFFIX
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Get model name from environment variable or use default
MODEL_NAME = os.getenv("MODEL_NAME", "HuggingFaceH4/zephyr-7b-beta")

def create_llm():
    """Create a HuggingFacePipeline LLM"""
    # Load model and tokenizer
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME, 
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
        low_cpu_mem_usage=True,
        device_map="auto" if torch.cuda.is_available() else None,
        # Optimize for Mistral model
        use_cache=True,
        quantization_config=None if torch.cuda.is_available() else {"load_in_8bit": True}
    )
    
    # Create text generation pipeline optimized for Mistral
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=512,
        temperature=0.7,
        top_p=0.95,
        repetition_penalty=1.15,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )
    
    # Create LangChain wrapper
    llm = HuggingFacePipeline(pipeline=pipe)
    
    return llm

def create_agent(db: Session):
    """Create a LangChain agent with tools"""
    # Create tools
    tools = [
        LaborCostTool(db=db),
        MaterialCostTool(db=db),
        MarginTool(db=db)
    ]
    
    # Create LLM
    llm = create_llm()
    
    # Create prompt template
    prompt = ZeroShotAgent.create_prompt(
        tools=tools,
        prefix=PREFIX,
        format_instructions=FORMAT_INSTRUCTIONS,
        suffix=SUFFIX,
        input_variables=["input", "chat_history", "agent_scratchpad"]
    )
    
    # Create memory
    memory = ConversationBufferMemory(memory_key="chat_history")
    
    # Create agent
    agent_chain = create_react_agent(llm=llm, tools=tools, prompt=prompt)
    
    # Create agent executor
    agent_executor = AgentExecutor.from_agent_and_tools(
        agent=agent_chain,
        tools=tools,
        memory=memory,
        verbose=True,
        handle_parsing_errors=True,
        max_iterations=5
    )
    
    return agent_executor