rishu834763 commited on
Commit
6e3b283
·
verified ·
1 Parent(s): c94c675

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -51
app.py CHANGED
@@ -3,74 +3,46 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
  from peft import PeftModel, PeftConfig
4
  import gradio as gr
5
 
6
- # === Load your LoRA correctly ===
7
- peft_model_id = "rishu834763/java-explainer-lora"
8
 
9
- config = PeftConfig.from_pretrained(peft_model_id)
10
- base_model_name = config.base_model_name_or_path # this will be mistralai/Mistral-7B-Instruct-v0.2
 
11
 
12
- # Load base model (with quantization if you want to fit in free tier)
13
  model = AutoModelForCausalLM.from_pretrained(
14
  base_model_name,
15
  torch_dtype=torch.bfloat16,
16
  device_map="auto",
17
- # Remove the two lines below if you have enough VRAM or a paid Space
18
- # load_in_4bit=True,
19
- # quantization_config=BitsAndBytesConfig(
20
- # load_in_4bit=True,
21
- # bnb_4bit_compute_dtype=torch.bfloat16,
22
- # bnb_4bit_use_double_quant=True,
23
- # bnb_4bit_quant_type="nf4"
24
- # ),
25
  )
26
 
27
- model = PeftModel.from_pretrained(model, peft_model_id)
28
- # Optional but recommended: merge so inference is faster and uses less VRAM
29
  model = model.merge_and_unload()
30
 
31
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
32
  if tokenizer.pad_token is None:
33
  tokenizer.pad_token = tokenizer.eos_token
34
 
35
- # Create the pipeline using YOUR model and tokenizer
36
- pipe = pipeline(
37
- "text-generation",
38
- model=model,
39
- tokenizer=tokenizer,
40
- torch_dtype=torch.bfloat16,
41
- device_map="auto",
42
- )
43
 
44
- # === This is the most important part ===
45
- def chat(message, history):
46
  messages = []
47
- for user_msg, assistant_msg in history:
48
- messages.append({"role": "user", "content": user_msg})
49
- if assistant_msg:
50
- messages.append({"role": "assistant", "content": assistant_msg})
51
  messages.append({"role": "user", "content": message})
52
 
53
- outputs = pipe(
54
- messages,
55
- max_new_tokens=512,
56
- do_sample=True,
57
- temperature=0.7,
58
- top_p=0.9,
59
- pad_token_id=tokenizer.eos_token_id
60
- )
61
- response = outputs[0]["generated_text"][-1]["content"]
62
- return response
63
 
64
- # === Build the Gradio interface ===
65
- demo = gr.ChatInterface(
66
- fn=chat,
67
- title="Java Explainer (Mistral-7B + your LoRA)",
68
- description="Ask anything about Java code → I will explain it using your fine-tuned model",
69
  examples=[
70
- "Explain this Java code: public class HelloWorld { public static void main(String[] args) { System.out.println(\"Hello, World!\"); } }",
71
- "What does synchronized keyword do in Java?"
72
- ],
73
- cache_examples=False,
74
- )
75
-
76
- demo.launch()
 
3
  from peft import PeftModel, PeftConfig
4
  import gradio as gr
5
 
6
+ PEFT_MODEL_ID = "rishu834763/java-explainer-lora"
 
7
 
8
+ config = PeftConfig.from_pretrained(PEFT_MODEL_ID)
9
+ base_model_name = config.base_model_name_or_path
10
+ print(f"Loading base model: {base_model_name}")
11
 
 
12
  model = AutoModelForCausalLM.from_pretrained(
13
  base_model_name,
14
  torch_dtype=torch.bfloat16,
15
  device_map="auto",
16
+ load_in_4bit=True, # removes this line only if you upgrade to Pro
 
 
 
 
 
 
 
17
  )
18
 
19
+ model = PeftModel.from_pretrained(model, PEFT_MODEL_ID)
 
20
  model = model.merge_and_unload()
21
 
22
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
23
  if tokenizer.pad_token is None:
24
  tokenizer.pad_token = tokenizer.eos_token
25
 
26
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 
 
 
 
 
 
 
27
 
28
+ def respond(message, history):
 
29
  messages = []
30
+ for user, assistant in history:
31
+ messages.append({"role": "user", "content": user})
32
+ if assistant:
33
+ messages.append({"role": "assistant", "content": assistant})
34
  messages.append({"role": "user", "content": message})
35
 
36
+ output = pipe(messages, max_new_tokens=1024, temperature=0.6, do_sample=True)
37
+ return output[0]["generated_text"][-1]["content"]
 
 
 
 
 
 
 
 
38
 
39
+ gr.ChatInterface(
40
+ respond,
41
+ title="Java Explainer – Your Own Fine-Tuned Model",
42
+ description="This is 100% your LoRA model, not ChatGPT, not Mistral, not anything else.",
 
43
  examples=[
44
+ "Explain this Java code in simple terms: public class Hello { public static void main(String[] args) { System.out.println(\"Hello World\"); }}",
45
+ "What is the difference between == and .equals() in Java?",
46
+ "Why do we mark methods as static in main?"
47
+ ]
48
+ ).queue().launch()