jdesiree commited on
Commit
ad35ef3
·
verified ·
1 Parent(s): 836e5f7

Replaced Quantumization

Browse files

Removed:

AutoModelForCausalLM (PyTorch model)
BitsAndBytesConfig (bitsandbytes quantization)
DynamicCache (PyTorch caching)

Added:

ORTModelForCausalLM (ONNX Runtime model)
ORTQuantizer (ONNX quantization)
AutoQuantizationConfig (ONNX quantization config)

Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -29,7 +29,9 @@ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
29
  from langchain_core.runnables import Runnable
30
  from langchain_core.runnables.utils import Input, Output
31
 
32
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextIteratorStreamer, DynamicCache
 
 
33
  import torch
34
 
35
  load_dotenv(".env")
@@ -537,7 +539,7 @@ class Educational_Agent:
537
  start_init_and_langgraph_time = time.perf_counter()
538
  current_time = datetime.now()
539
 
540
- self.llm = Phi3MiniEducationalLLM(model_path="microsoft/Phi-3-mini-4k-instruct", use_4bit=True)
541
  self.tool_decision_engine = Tool_Decision_Engine(self.llm)
542
 
543
  # Create LangGraph workflow
 
29
  from langchain_core.runnables import Runnable
30
  from langchain_core.runnables.utils import Input, Output
31
 
32
+ from transformers import AutoTokenizer, TextIteratorStreamer
33
+ from optimum.onnxruntime import ORTModelForCausalLM, ORTQuantizer
34
+ from optimum.onnxruntime.configuration import AutoQuantizationConfig
35
  import torch
36
 
37
  load_dotenv(".env")
 
539
  start_init_and_langgraph_time = time.perf_counter()
540
  current_time = datetime.now()
541
 
542
+ self.llm = Phi3MiniEducationalLLM(model_path="microsoft/Phi-3-mini-4k-instruct", use_quantization=True)
543
  self.tool_decision_engine = Tool_Decision_Engine(self.llm)
544
 
545
  # Create LangGraph workflow