Spaces:
Sleeping
Sleeping
Replaced Quantumization
Browse filesRemoved:
AutoModelForCausalLM (PyTorch model)
BitsAndBytesConfig (bitsandbytes quantization)
DynamicCache (PyTorch caching)
Added:
ORTModelForCausalLM (ONNX Runtime model)
ORTQuantizer (ONNX quantization)
AutoQuantizationConfig (ONNX quantization config)
app.py
CHANGED
|
@@ -29,7 +29,9 @@ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
|
| 29 |
from langchain_core.runnables import Runnable
|
| 30 |
from langchain_core.runnables.utils import Input, Output
|
| 31 |
|
| 32 |
-
from transformers import AutoTokenizer,
|
|
|
|
|
|
|
| 33 |
import torch
|
| 34 |
|
| 35 |
load_dotenv(".env")
|
|
@@ -537,7 +539,7 @@ class Educational_Agent:
|
|
| 537 |
start_init_and_langgraph_time = time.perf_counter()
|
| 538 |
current_time = datetime.now()
|
| 539 |
|
| 540 |
-
self.llm = Phi3MiniEducationalLLM(model_path="microsoft/Phi-3-mini-4k-instruct",
|
| 541 |
self.tool_decision_engine = Tool_Decision_Engine(self.llm)
|
| 542 |
|
| 543 |
# Create LangGraph workflow
|
|
|
|
| 29 |
from langchain_core.runnables import Runnable
|
| 30 |
from langchain_core.runnables.utils import Input, Output
|
| 31 |
|
| 32 |
+
from transformers import AutoTokenizer, TextIteratorStreamer
|
| 33 |
+
from optimum.onnxruntime import ORTModelForCausalLM, ORTQuantizer
|
| 34 |
+
from optimum.onnxruntime.configuration import AutoQuantizationConfig
|
| 35 |
import torch
|
| 36 |
|
| 37 |
load_dotenv(".env")
|
|
|
|
| 539 |
start_init_and_langgraph_time = time.perf_counter()
|
| 540 |
current_time = datetime.now()
|
| 541 |
|
| 542 |
+
self.llm = Phi3MiniEducationalLLM(model_path="microsoft/Phi-3-mini-4k-instruct", use_quantization=True)
|
| 543 |
self.tool_decision_engine = Tool_Decision_Engine(self.llm)
|
| 544 |
|
| 545 |
# Create LangGraph workflow
|