Rifqidits commited on
Commit
795555f
·
1 Parent(s): 96e720f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -10
app.py CHANGED
@@ -1,8 +1,7 @@
1
- # chatbot_template.py
2
  import spaces
3
 
4
  DESCRIPTION = """
5
- # Simple Chatbot Template
6
  This is a placeholder demo template for a chatbot on Hugging Face Spaces using Gradio.
7
  You can modify the backend `generate_response` function to integrate any LLM model (like LLaMA, Mistral, or others).
8
  """
@@ -13,7 +12,6 @@ LICENSE = """
13
  """
14
 
15
  # This is a dummy generation function
16
- @spaces.GPU # This allows it to run on GPU Spaces (remove if not needed)
17
  import torch
18
  import gradio as gr
19
  from transformers import AutoTokenizer, AutoModelForCausalLM
@@ -26,7 +24,10 @@ lora_path = "./tat-llm-final-e3" # Path to your fine-tuned LoRA folder
26
  # Load base model and LoRA adapter
27
  base_model = AutoModelForCausalLM.from_pretrained(base_model_id, torch_dtype=torch.float16)
28
  model = PeftModel.from_pretrained(base_model, lora_path)
29
- model.eval().cuda()
 
 
 
30
 
31
  # Load tokenizer
32
  tokenizer = AutoTokenizer.from_pretrained(lora_path)
@@ -34,22 +35,19 @@ tokenizer = AutoTokenizer.from_pretrained(lora_path)
34
  # === [2] Prompt Formatting Function ===
35
  def create_prompt(table, context, question):
36
  return f"""You are a financial assistant. Given the table and context, answer the question.
37
-
38
  Table:
39
  {table}
40
-
41
  Context:
42
  {context}
43
-
44
  Question:
45
  {question}
46
-
47
  Answer:"""
48
 
49
  # === [3] Inference Function ===
50
  def answer_question(table, context, question):
51
  prompt = create_prompt(table, context, question)
52
- inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
 
53
  with torch.no_grad():
54
  outputs = model.generate(
55
  **inputs,
@@ -57,7 +55,11 @@ def answer_question(table, context, question):
57
  do_sample=False,
58
  eos_token_id=tokenizer.eos_token_id
59
  )
60
- return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
 
 
 
 
61
 
62
  # === [4] Gradio UI Layout ===
63
  with gr.Blocks(title="TAT-LLM Table & Text QA") as demo:
 
 
1
  import spaces
2
 
3
  DESCRIPTION = """
4
+ # Simple Chatbot Template
5
  This is a placeholder demo template for a chatbot on Hugging Face Spaces using Gradio.
6
  You can modify the backend `generate_response` function to integrate any LLM model (like LLaMA, Mistral, or others).
7
  """
 
12
  """
13
 
14
  # This is a dummy generation function
 
15
  import torch
16
  import gradio as gr
17
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
24
  # Load base model and LoRA adapter
25
  base_model = AutoModelForCausalLM.from_pretrained(base_model_id, torch_dtype=torch.float16)
26
  model = PeftModel.from_pretrained(base_model, lora_path)
27
+
28
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
29
+ model = model.to(device)
30
+ model.eval()
31
 
32
  # Load tokenizer
33
  tokenizer = AutoTokenizer.from_pretrained(lora_path)
 
35
  # === [2] Prompt Formatting Function ===
36
  def create_prompt(table, context, question):
37
  return f"""You are a financial assistant. Given the table and context, answer the question.
 
38
  Table:
39
  {table}
 
40
  Context:
41
  {context}
 
42
  Question:
43
  {question}
 
44
  Answer:"""
45
 
46
  # === [3] Inference Function ===
47
  def answer_question(table, context, question):
48
  prompt = create_prompt(table, context, question)
49
+ inputs = tokenizer(prompt, return_tensors="pt").to(device) # <-- use .to(device)
50
+
51
  with torch.no_grad():
52
  outputs = model.generate(
53
  **inputs,
 
55
  do_sample=False,
56
  eos_token_id=tokenizer.eos_token_id
57
  )
58
+
59
+ # --- Decode and clean up ---
60
+ output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
61
+ answer = output_text.split("Answer:")[-1].strip()
62
+ return answer
63
 
64
  # === [4] Gradio UI Layout ===
65
  with gr.Blocks(title="TAT-LLM Table & Text QA") as demo: