Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -179,7 +179,6 @@
|
|
| 179 |
|
| 180 |
|
| 181 |
|
| 182 |
-
|
| 183 |
import gradio as gr
|
| 184 |
import fitz # PyMuPDF
|
| 185 |
import torch
|
|
@@ -191,16 +190,19 @@ from langchain_community.vectorstores import FAISS
|
|
| 191 |
from langchain_core.embeddings import Embeddings
|
| 192 |
|
| 193 |
# --- ONNX & MODEL IMPORTS ---
|
| 194 |
-
from transformers import AutoTokenizer
|
| 195 |
from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForCausalLM
|
|
|
|
| 196 |
|
| 197 |
# ---------------------------------------------------------
|
| 198 |
# 1. Custom ONNX Embedding Class (BGE-Large)
|
| 199 |
# ---------------------------------------------------------
|
| 200 |
class OnnxBgeEmbeddings(Embeddings):
|
| 201 |
-
def __init__(self, model_name="BAAI/bge-large-en-v1.5"
|
| 202 |
print(f"π Loading Embeddings: {model_name}...")
|
| 203 |
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
|
|
|
|
| 204 |
self.model = ORTModelForFeatureExtraction.from_pretrained(model_name, export=True)
|
| 205 |
|
| 206 |
def _process_batch(self, texts):
|
|
@@ -218,26 +220,31 @@ class OnnxBgeEmbeddings(Embeddings):
|
|
| 218 |
def embed_query(self, text):
|
| 219 |
return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
|
| 220 |
|
| 221 |
-
# ---------------------------------------------------------
|
| 222 |
-
# 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
|
| 223 |
-
# ---------------------------------------------------------
|
| 224 |
# ---------------------------------------------------------
|
| 225 |
# 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
|
| 226 |
# ---------------------------------------------------------
|
| 227 |
class LLMEvaluator:
|
| 228 |
def __init__(self):
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
print(f"π Loading LLM: {self.model_id}...")
|
| 232 |
|
| 233 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
|
| 235 |
-
#
|
| 236 |
-
# The library now automatically finds 'model.onnx' in the repo
|
| 237 |
self.model = ORTModelForCausalLM.from_pretrained(
|
| 238 |
-
|
| 239 |
use_cache=True,
|
| 240 |
-
use_io_binding=False
|
| 241 |
)
|
| 242 |
|
| 243 |
def evaluate(self, context, question, student_answer):
|
|
@@ -286,6 +293,7 @@ class LLMEvaluator:
|
|
| 286 |
skip_special_tokens=True
|
| 287 |
)
|
| 288 |
return response
|
|
|
|
| 289 |
# ---------------------------------------------------------
|
| 290 |
# 3. Main Application Logic
|
| 291 |
# ---------------------------------------------------------
|
|
|
|
| 179 |
|
| 180 |
|
| 181 |
|
|
|
|
| 182 |
import gradio as gr
|
| 183 |
import fitz # PyMuPDF
|
| 184 |
import torch
|
|
|
|
| 190 |
from langchain_core.embeddings import Embeddings
|
| 191 |
|
| 192 |
# --- ONNX & MODEL IMPORTS ---
|
| 193 |
+
from transformers import AutoTokenizer
|
| 194 |
from optimum.onnxruntime import ORTModelForFeatureExtraction, ORTModelForCausalLM
|
| 195 |
+
from huggingface_hub import snapshot_download
|
| 196 |
|
| 197 |
# ---------------------------------------------------------
|
| 198 |
# 1. Custom ONNX Embedding Class (BGE-Large)
|
| 199 |
# ---------------------------------------------------------
|
| 200 |
class OnnxBgeEmbeddings(Embeddings):
|
| 201 |
+
def __init__(self, model_name="BAAI/bge-large-en-v1.5"):
|
| 202 |
print(f"π Loading Embeddings: {model_name}...")
|
| 203 |
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 204 |
+
# Note: export=True will re-convert on every restart.
|
| 205 |
+
# For production, you'd want to save this permanently, but this works for now.
|
| 206 |
self.model = ORTModelForFeatureExtraction.from_pretrained(model_name, export=True)
|
| 207 |
|
| 208 |
def _process_batch(self, texts):
|
|
|
|
| 220 |
def embed_query(self, text):
|
| 221 |
return self._process_batch(["Represent this sentence for searching relevant passages: " + text])[0]
|
| 222 |
|
|
|
|
|
|
|
|
|
|
| 223 |
# ---------------------------------------------------------
|
| 224 |
# 2. LLM Evaluator Class (Llama-3.2-1B ONNX)
|
| 225 |
# ---------------------------------------------------------
|
| 226 |
class LLMEvaluator:
|
| 227 |
def __init__(self):
|
| 228 |
+
self.repo_id = "onnx-community/Llama-3.2-1B-Instruct"
|
| 229 |
+
print(f"π Preparing LLM: {self.repo_id}...")
|
|
|
|
| 230 |
|
| 231 |
+
# [CRITICAL FIX]
|
| 232 |
+
# Download model to a specific LOCAL directory to avoid cache symlink errors
|
| 233 |
+
print("π₯ Downloading model to local directory (this fixes the filesystem error)...")
|
| 234 |
+
local_model_path = snapshot_download(
|
| 235 |
+
repo_id=self.repo_id,
|
| 236 |
+
local_dir="onnx_llama_local", # Downloads to ./onnx_llama_local/
|
| 237 |
+
local_dir_use_symlinks=False # Forces real files, not symlinks
|
| 238 |
+
)
|
| 239 |
+
print("β
Download complete.")
|
| 240 |
+
|
| 241 |
+
self.tokenizer = AutoTokenizer.from_pretrained(local_model_path)
|
| 242 |
|
| 243 |
+
# Load from the local folder
|
|
|
|
| 244 |
self.model = ORTModelForCausalLM.from_pretrained(
|
| 245 |
+
local_model_path,
|
| 246 |
use_cache=True,
|
| 247 |
+
use_io_binding=False
|
| 248 |
)
|
| 249 |
|
| 250 |
def evaluate(self, context, question, student_answer):
|
|
|
|
| 293 |
skip_special_tokens=True
|
| 294 |
)
|
| 295 |
return response
|
| 296 |
+
|
| 297 |
# ---------------------------------------------------------
|
| 298 |
# 3. Main Application Logic
|
| 299 |
# ---------------------------------------------------------
|