Spaces:

BarudakLearning
/

terTATa

Sleeping

App Files Files Community

Rifqidits commited on Jul 9, 2025

Commit

795555f

1 Parent(s): 96e720f

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -10

app.py CHANGED Viewed

@@ -1,8 +1,7 @@
-# chatbot_template.py
 import spaces
 DESCRIPTION = """
-# ✨ Simple Chatbot Template
 This is a placeholder demo template for a chatbot on Hugging Face Spaces using Gradio.
 You can modify the backend `generate_response` function to integrate any LLM model (like LLaMA, Mistral, or others).
 """
@@ -13,7 +12,6 @@ LICENSE = """
 """
 # This is a dummy generation function
-@spaces.GPU  # This allows it to run on GPU Spaces (remove if not needed)
 import torch
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
@@ -26,7 +24,10 @@ lora_path = "./tat-llm-final-e3"  # Path to your fine-tuned LoRA folder
 # Load base model and LoRA adapter
 base_model = AutoModelForCausalLM.from_pretrained(base_model_id, torch_dtype=torch.float16)
 model = PeftModel.from_pretrained(base_model, lora_path)
-model.eval().cuda()
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained(lora_path)
@@ -34,22 +35,19 @@ tokenizer = AutoTokenizer.from_pretrained(lora_path)
 # === [2] Prompt Formatting Function ===
 def create_prompt(table, context, question):
     return f"""You are a financial assistant. Given the table and context, answer the question.
 Table:
 {table}
 Context:
 {context}
 Question:
 {question}
 Answer:"""
 # === [3] Inference Function ===
 def answer_question(table, context, question):
     prompt = create_prompt(table, context, question)
-    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
@@ -57,7 +55,11 @@ def answer_question(table, context, question):
             do_sample=False,
             eos_token_id=tokenizer.eos_token_id
         )
-    return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
 # === [4] Gradio UI Layout ===
 with gr.Blocks(title="TAT-LLM Table & Text QA") as demo:

 import spaces
 DESCRIPTION = """
+# Simple Chatbot Template
 This is a placeholder demo template for a chatbot on Hugging Face Spaces using Gradio.
 You can modify the backend `generate_response` function to integrate any LLM model (like LLaMA, Mistral, or others).
 """
 """
 # This is a dummy generation function
 import torch
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 # Load base model and LoRA adapter
 base_model = AutoModelForCausalLM.from_pretrained(base_model_id, torch_dtype=torch.float16)
 model = PeftModel.from_pretrained(base_model, lora_path)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = model.to(device)
+model.eval()
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained(lora_path)
 # === [2] Prompt Formatting Function ===
 def create_prompt(table, context, question):
     return f"""You are a financial assistant. Given the table and context, answer the question.
 Table:
 {table}
 Context:
 {context}
 Question:
 {question}
 Answer:"""
 # === [3] Inference Function ===
 def answer_question(table, context, question):
     prompt = create_prompt(table, context, question)
+    inputs = tokenizer(prompt, return_tensors="pt").to(device)  # <-- use .to(device)
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
             do_sample=False,
             eos_token_id=tokenizer.eos_token_id
         )
+    # --- Decode and clean up ---
+    output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    answer = output_text.split("Answer:")[-1].strip()
+    return answer
 # === [4] Gradio UI Layout ===
 with gr.Blocks(title="TAT-LLM Table & Text QA") as demo: