rishabhsetiya commited on
Commit
60dcea8
·
verified ·
1 Parent(s): eff248a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -221
app.py CHANGED
@@ -1,228 +1,15 @@
1
- import os
2
- import json
3
- import torch
4
- import math
5
- import pandas as pd
6
- import torch.nn as nn
7
- import torch.nn.functional as F
8
- from datasets import Dataset
9
- import transformers
10
- from transformers import AutoModelForCausalLM, DataCollatorForLanguageModeling, Trainer, TrainingArguments
11
- from peft import LoraConfig, get_peft_model
12
  import gradio as gr
 
13
 
14
- # -----------------------------
15
- # ENVIRONMENT / CACHE
16
- # -----------------------------
17
- os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface_cache"
18
- os.environ["HF_HOME"] = "/tmp/huggingface_cache"
19
- os.environ["HF_DATASETS_CACHE"] = "/tmp/huggingface_cache"
20
- os.environ["HF_METRICS_CACHE"] = "/tmp/huggingface_cache"
21
- os.environ["WANDB_MODE"] = "disabled"
22
 
23
- # -----------------------------
24
- # SETTINGS
25
- # -----------------------------
26
- MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
27
- HF_TOKEN = os.getenv("HF_TOKEN")
28
- device = "cuda" if torch.cuda.is_available() else "cpu"
29
-
30
- # -----------------------------
31
- # LOAD TOKENIZER
32
- # -----------------------------
33
- tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_ID)
34
-
35
- # -----------------------------
36
- # LoRA / MoE Modules
37
- # -----------------------------
38
- class LoraLinear(nn.Module):
39
- def __init__(self, in_features, out_features, r=8, lora_alpha=16, lora_dropout=0.05, bias=False):
40
- super().__init__()
41
- self.in_features = in_features
42
- self.out_features = out_features
43
- self.r = r
44
- self.scaling = lora_alpha / r if r > 0 else 1.0
45
- self.weight = nn.Parameter(torch.empty(out_features, in_features), requires_grad=False)
46
- self.bias = nn.Parameter(torch.zeros(out_features), requires_grad=False) if bias else None
47
-
48
- if r > 0:
49
- self.lora_A = nn.Parameter(torch.zeros((r, in_features)))
50
- self.lora_B = nn.Parameter(torch.zeros((out_features, r)))
51
- nn.init.kaiming_uniform_(self.lora_A, a=math.sqrt(5))
52
- nn.init.zeros_(self.lora_B)
53
- self.lora_dropout = nn.Dropout(p=lora_dropout)
54
- else:
55
- self.lora_A, self.lora_B, self.lora_dropout = None, None, None
56
-
57
- def forward(self, x):
58
- result = F.linear(x, self.weight, self.bias)
59
- if self.r > 0:
60
- lora_out = self.lora_dropout(x) @ self.lora_A.T @ self.lora_B.T
61
- result = result + self.scaling * lora_out
62
- return result
63
-
64
- class MoELoRALinear(nn.Module):
65
- def __init__(self, base_linear, r, num_experts=2, k=1, lora_alpha=16, lora_dropout=0.05):
66
- super().__init__()
67
- self.base_linear = base_linear
68
- self.num_experts = num_experts
69
- self.k = k
70
- self.experts = nn.ModuleList([
71
- LoraLinear(base_linear.in_features, base_linear.out_features, r=r, lora_alpha=lora_alpha, lora_dropout=lora_dropout)
72
- for _ in range(num_experts)
73
- ])
74
- self.gate = nn.Linear(base_linear.in_features, num_experts)
75
-
76
- def forward(self, x):
77
- base_out = self.base_linear(x)
78
- gate_scores = torch.softmax(self.gate(x), dim=-1)
79
- expert_out = 0
80
- for i, expert in enumerate(self.experts):
81
- expert_out += gate_scores[..., i:i+1] * expert(x)
82
- return base_out + expert_out
83
-
84
- def replace_proj_with_moe_lora(model, r=8, num_experts=2, k=1, lora_alpha=16, lora_dropout=0.05):
85
- for layer in model.model.layers:
86
- for proj_name in ["up_proj", "down_proj"]:
87
- old = getattr(layer.mlp, proj_name)
88
- moe = MoELoRALinear(
89
- base_linear=old,
90
- r=r,
91
- num_experts=num_experts,
92
- k=k,
93
- lora_alpha=lora_alpha,
94
- lora_dropout=lora_dropout,
95
- ).to(next(old.parameters()).device)
96
- setattr(layer.mlp, proj_name, moe)
97
- return model
98
-
99
- # -----------------------------
100
- # Load / Prepare Model & Dataset
101
- # -----------------------------
102
- def preprocess(example):
103
- tokens = tokenizer(example['text'], truncation=True, padding=False)
104
- text = example['text']
105
- assistant_index = text.find("<|assistant|>")
106
- prefix_ids = tokenizer(text[:assistant_index], add_special_tokens=False)['input_ids']
107
- prefix_len = len(prefix_ids)
108
- labels = tokens['input_ids'].copy()
109
- labels[:prefix_len] = [-100] * prefix_len
110
- tokens['labels'] = labels
111
- return tokens
112
-
113
- def load_model(model_id):
114
- # Hardcoded dataset if file not present
115
- data = [
116
- {"question": "What were MakeMyTrip's total assets as of March 31, 2024?",
117
- "answer": "MakeMyTrip's total assets as of March 31, 2024 were USD 1,660,077 thousand."},
118
- {"question": "What was MakeMyTrip's total revenue for the year ended March 31, 2025?",
119
- "answer": "MakeMyTrip's total revenue for the year ended March 31, 2025 was USD 978,336 thousand."},
120
- ]
121
-
122
- # Get the path of the current script
123
- current_dir = os.path.dirname(os.path.abspath(__file__))
124
-
125
- json_file_path = os.path.join(current_dir, 'makemytrip_qa_full.json')
126
-
127
- # Read JSON file
128
- with open(json_file_path, 'r', encoding='utf-8') as f:
129
- data = json.load(f)
130
-
131
- df = pd.DataFrame(data)
132
-
133
- print(f"Loaded dataset containing {len(df)} questions")
134
- training_data = []
135
- system_prompt = "You are a helpful assistant that provides financial data from MakeMyTrip reports."
136
- for index, row in df.iterrows():
137
- training_data.append({"text": f"<|system|>\n{system_prompt}</s>\n<|user|>\n{row['question']}</s>\n<|assistant|>\n{row['answer']}</s>"})
138
- dataset = Dataset.from_list(training_data)
139
- tokenized_dataset = dataset.map(preprocess, remove_columns=["text"])
140
-
141
- base_model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).to(device)
142
- model = replace_proj_with_moe_lora(base_model)
143
- peft_config = LoraConfig(r=8, lora_alpha=16, lora_dropout=0.05, target_modules=["o_proj"], bias="none", task_type="CAUSAL_LM")
144
- model = get_peft_model(model, peft_config)
145
-
146
- trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
147
- total = sum(p.numel() for p in model.parameters())
148
- print(f"Trainable params: {trainable:,d} || Total params: {total:,d} || "
149
- f"Trainable%: {100 * trainable / total:.4f}")
150
-
151
- model.config.use_cache = False
152
- model.gradient_checkpointing_disable()
153
-
154
- data_collator = DataCollatorForLanguageModeling(
155
- tokenizer=tokenizer,
156
- mlm=False
157
- )
158
-
159
- training_args = TrainingArguments(
160
- learning_rate=5e-5,
161
- output_dir="./results",
162
- num_train_epochs=2,
163
- per_device_train_batch_size=1, # Keep batch size small
164
- gradient_accumulation_steps=4, # Increased gradient accumulation steps
165
- logging_steps=1,
166
- save_steps=10,
167
- save_total_limit=2,
168
- fp16=True, # fp16 and bf16 are mutually exclusive. bf16 is recommended for Ampere+ GPUs.
169
- bf16=False, # Use bf16 for better performance with 4-bit models
170
- )
171
-
172
- trainer = Trainer(
173
- model=model,
174
- args=training_args,
175
- train_dataset=tokenized_dataset,
176
- data_collator=data_collator
177
- )
178
- print("Training started")
179
- trainer.train()
180
-
181
- model.eval()
182
- return model
183
-
184
- model = load_model(MODEL_ID)
185
-
186
- # -----------------------------
187
- # Gradio Interface
188
- # -----------------------------
189
- def generate_answer(prompt, max_tokens):
190
- if prompt.strip() == "":
191
- return "Please enter a prompt!"
192
- system_prompt = "You are a helpful assistant that provides financial data from MakeMyTrip reports."
193
- messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}]
194
- input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
195
- inputs = tokenizer(input_text, return_tensors="pt").to(device)
196
- with torch.no_grad():
197
- outputs = model.generate(
198
- **inputs,
199
- max_new_tokens=max_tokens,
200
- do_sample=True,
201
- top_p=0.9,
202
- temperature=0.7,
203
- )
204
- decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
205
- try:
206
- # The response will look like "<|system|>\n...</s>\n<|user|>\n...</s>\n<|assistant|>\n...answer...</s>"
207
- # We need to find the <|assistant|> token and get everything after it
208
- answer_start_token = '<|assistant|>'
209
- answer_start_index = decoded_output.rfind(answer_start_token)
210
-
211
- if answer_start_index != -1:
212
- generated_answer = decoded_output[answer_start_index + len(answer_start_token):].strip()
213
- # The model might generate a final </s> token, which we should remove
214
- if generated_answer.endswith('</s>'):
215
- generated_answer = generated_answer[:-len('</s>')].strip()
216
- else:
217
- generated_answer = "Could not extract answer from model output."
218
-
219
- except Exception as e:
220
- generated_answer = f"An error occurred: {e}"
221
-
222
- return generated_answer
223
 
224
  iface = gr.Interface(
225
- fn=generate_answer,
226
  inputs=[
227
  gr.Textbox(label="Enter your question:", lines=5, placeholder="Type your question here..."),
228
  gr.Slider(minimum=50, maximum=500, step=10, value=200, label="Max tokens to generate")
@@ -233,4 +20,3 @@ iface = gr.Interface(
233
  ).queue()
234
 
235
  iface.launch()
236
-
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from fine_tuning import load_and_train, generate_answer
3
 
4
+ # Load and train model
5
+ model, tokenizer, device = load_and_train()
 
 
 
 
 
 
6
 
7
+ # Wrap for Gradio
8
+ def gradio_generate(prompt, max_tokens):
9
+ return generate_answer(model, tokenizer, device, prompt, max_tokens)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  iface = gr.Interface(
12
+ fn=gradio_generate,
13
  inputs=[
14
  gr.Textbox(label="Enter your question:", lines=5, placeholder="Type your question here..."),
15
  gr.Slider(minimum=50, maximum=500, step=10, value=200, label="Max tokens to generate")
 
20
  ).queue()
21
 
22
  iface.launch()