saeedbenadeeb commited on
Commit
e8db7c8
·
verified ·
1 Parent(s): 103d73d

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +4 -5
app.py CHANGED
@@ -4,7 +4,6 @@ import logging
4
  import re
5
 
6
  import gradio as gr
7
- import spaces
8
  import torch
9
  from transformers import AutoModelForCausalLM, AutoTokenizer
10
 
@@ -29,12 +28,13 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
29
  if tokenizer.pad_token is None:
30
  tokenizer.pad_token = tokenizer.eos_token
31
 
 
 
32
  model = AutoModelForCausalLM.from_pretrained(
33
  MODEL_ID,
34
- torch_dtype=torch.bfloat16,
35
- device_map="auto",
36
  trust_remote_code=True,
37
- )
38
  model.eval()
39
  logger.info("Model loaded.")
40
 
@@ -73,7 +73,6 @@ def _grade_relevance(question: str, sources: list[dict]) -> bool:
73
  return top_score >= 0.02 or overlap >= 0.35
74
 
75
 
76
- @spaces.GPU
77
  def crag_answer(message: str, history: list[dict]) -> str:
78
  question = message.strip()
79
  if not question:
 
4
  import re
5
 
6
  import gradio as gr
 
7
  import torch
8
  from transformers import AutoModelForCausalLM, AutoTokenizer
9
 
 
28
  if tokenizer.pad_token is None:
29
  tokenizer.pad_token = tokenizer.eos_token
30
 
31
+ device = "cuda" if torch.cuda.is_available() else "cpu"
32
+ dtype = torch.bfloat16 if device == "cuda" else torch.float32
33
  model = AutoModelForCausalLM.from_pretrained(
34
  MODEL_ID,
35
+ torch_dtype=dtype,
 
36
  trust_remote_code=True,
37
+ ).to(device)
38
  model.eval()
39
  logger.info("Model loaded.")
40
 
 
73
  return top_score >= 0.02 or overlap >= 0.35
74
 
75
 
 
76
  def crag_answer(message: str, history: list[dict]) -> str:
77
  question = message.strip()
78
  if not question: