Spaces:
Sleeping
Sleeping
Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -4,7 +4,6 @@ import logging
|
|
| 4 |
import re
|
| 5 |
|
| 6 |
import gradio as gr
|
| 7 |
-
import spaces
|
| 8 |
import torch
|
| 9 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 10 |
|
|
@@ -29,12 +28,13 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
|
|
| 29 |
if tokenizer.pad_token is None:
|
| 30 |
tokenizer.pad_token = tokenizer.eos_token
|
| 31 |
|
|
|
|
|
|
|
| 32 |
model = AutoModelForCausalLM.from_pretrained(
|
| 33 |
MODEL_ID,
|
| 34 |
-
torch_dtype=
|
| 35 |
-
device_map="auto",
|
| 36 |
trust_remote_code=True,
|
| 37 |
-
)
|
| 38 |
model.eval()
|
| 39 |
logger.info("Model loaded.")
|
| 40 |
|
|
@@ -73,7 +73,6 @@ def _grade_relevance(question: str, sources: list[dict]) -> bool:
|
|
| 73 |
return top_score >= 0.02 or overlap >= 0.35
|
| 74 |
|
| 75 |
|
| 76 |
-
@spaces.GPU
|
| 77 |
def crag_answer(message: str, history: list[dict]) -> str:
|
| 78 |
question = message.strip()
|
| 79 |
if not question:
|
|
|
|
| 4 |
import re
|
| 5 |
|
| 6 |
import gradio as gr
|
|
|
|
| 7 |
import torch
|
| 8 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 9 |
|
|
|
|
| 28 |
if tokenizer.pad_token is None:
|
| 29 |
tokenizer.pad_token = tokenizer.eos_token
|
| 30 |
|
| 31 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 32 |
+
dtype = torch.bfloat16 if device == "cuda" else torch.float32
|
| 33 |
model = AutoModelForCausalLM.from_pretrained(
|
| 34 |
MODEL_ID,
|
| 35 |
+
torch_dtype=dtype,
|
|
|
|
| 36 |
trust_remote_code=True,
|
| 37 |
+
).to(device)
|
| 38 |
model.eval()
|
| 39 |
logger.info("Model loaded.")
|
| 40 |
|
|
|
|
| 73 |
return top_score >= 0.02 or overlap >= 0.35
|
| 74 |
|
| 75 |
|
|
|
|
| 76 |
def crag_answer(message: str, history: list[dict]) -> str:
|
| 77 |
question = message.strip()
|
| 78 |
if not question:
|