nitya001 commited on
Commit
a2ebcc6
·
verified ·
1 Parent(s): 20c6d29

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -18
app.py CHANGED
@@ -1,19 +1,21 @@
1
  import torch
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import gradio as gr
 
 
4
 
5
  # ---------------- CONFIG ---------------- #
6
 
7
  BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # Base model
8
- LORA_REPO = "nitya001/autotrain-4n1y9-5ekvs" # Your fine-tuned LoRA repo
9
 
10
- # System prompt for behavior shaping
11
  SYSTEM_PROMPT = (
12
  "You are a helpful banking and loan support assistant. "
13
  "You answer short, clear, and factual responses about UTRs, EMIs, loan summaries, "
14
  "payment issues, and basic loan help. If unsure, respond generically."
15
  )
16
 
 
 
17
 
18
  # ---------------- LOAD TOKENIZER ---------------- #
19
 
@@ -24,34 +26,38 @@ if tokenizer.pad_token is None:
24
  tokenizer.pad_token = tokenizer.eos_token
25
 
26
 
27
- # ---------------- LOAD MODEL + LORA ---------------- #
28
-
29
- print("Loading base model + LoRA...")
30
 
31
- model = AutoModelForCausalLM.from_pretrained(
 
32
  BASE_MODEL,
33
  torch_dtype=torch.float32,
34
- device_map="cpu", # CPU runtime
35
- adapter_id=LORA_REPO, # <--- THE MAGIC LINE
 
 
 
 
 
 
 
36
  )
37
 
38
  model.eval()
39
- device = "cpu"
40
 
41
 
42
  # ---------------- CHAT FUNCTION ---------------- #
43
 
44
  def chat_fn(message, history):
45
  """
 
46
  history: list of [user, bot]
47
  """
48
 
49
- # Build conversation with system prompt
50
  conversation = f"System: {SYSTEM_PROMPT}\n"
51
-
52
  for user_msg, bot_msg in history:
53
  conversation += f"User: {user_msg}\nAssistant: {bot_msg}\n"
54
-
55
  conversation += f"User: {message}\nAssistant:"
56
 
57
  inputs = tokenizer(conversation, return_tensors="pt").to(device)
@@ -68,7 +74,7 @@ def chat_fn(message, history):
68
 
69
  full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
70
 
71
- # Extract only latest answer
72
  if "Assistant:" in full_output:
73
  reply = full_output.split("Assistant:")[-1].strip()
74
  else:
@@ -82,13 +88,13 @@ def chat_fn(message, history):
82
 
83
  demo = gr.ChatInterface(
84
  fn=chat_fn,
85
- title="💬 TinyLoan Assistant (TinyLlama + LoRA)",
86
  description="Ask about UTR, loan summaries, EMIs, transactions, or payment issues.",
87
  examples=[
88
  "What is my latest UTR?",
89
- "Generate my loan summary",
90
- "Show my recent transactions",
91
- "My payment is stuck, what to do?"
92
  ],
93
  )
94
 
 
1
  import torch
 
2
  import gradio as gr
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ from peft import PeftModel
5
 
6
  # ---------------- CONFIG ---------------- #
7
 
8
  BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # Base model
9
+ LORA_REPO = "nitya001/autotrain-4n1y9-5ekvs" # Your AutoTrain LoRA repo
10
 
 
11
  SYSTEM_PROMPT = (
12
  "You are a helpful banking and loan support assistant. "
13
  "You answer short, clear, and factual responses about UTRs, EMIs, loan summaries, "
14
  "payment issues, and basic loan help. If unsure, respond generically."
15
  )
16
 
17
+ device = "cpu"
18
+
19
 
20
  # ---------------- LOAD TOKENIZER ---------------- #
21
 
 
26
  tokenizer.pad_token = tokenizer.eos_token
27
 
28
 
29
+ # ---------------- LOAD BASE MODEL ---------------- #
 
 
30
 
31
+ print("Loading base model...")
32
+ base_model = AutoModelForCausalLM.from_pretrained(
33
  BASE_MODEL,
34
  torch_dtype=torch.float32,
35
+ device_map=device,
36
+ )
37
+
38
+ # ---------------- LOAD LORA ADAPTER ---------------- #
39
+
40
+ print(f"Loading LoRA adapter from {LORA_REPO} ...")
41
+ model = PeftModel.from_pretrained(
42
+ base_model,
43
+ LORA_REPO,
44
  )
45
 
46
  model.eval()
 
47
 
48
 
49
  # ---------------- CHAT FUNCTION ---------------- #
50
 
51
  def chat_fn(message, history):
52
  """
53
+ Gradio ChatInterface callback.
54
  history: list of [user, bot]
55
  """
56
 
57
+ # Build conversation text
58
  conversation = f"System: {SYSTEM_PROMPT}\n"
 
59
  for user_msg, bot_msg in history:
60
  conversation += f"User: {user_msg}\nAssistant: {bot_msg}\n"
 
61
  conversation += f"User: {message}\nAssistant:"
62
 
63
  inputs = tokenizer(conversation, return_tensors="pt").to(device)
 
74
 
75
  full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
76
 
77
+ # Extract only the latest answer after the last "Assistant:"
78
  if "Assistant:" in full_output:
79
  reply = full_output.split("Assistant:")[-1].strip()
80
  else:
 
88
 
89
  demo = gr.ChatInterface(
90
  fn=chat_fn,
91
+ title="💬 TinyLoan Assistant (TinyLlama + AutoTrain LoRA)",
92
  description="Ask about UTR, loan summaries, EMIs, transactions, or payment issues.",
93
  examples=[
94
  "What is my latest UTR?",
95
+ "Generate my loan summary.",
96
+ "Show my transactions.",
97
+ "My payment is stuck, what should I do?",
98
  ],
99
  )
100