rishu834763 commited on
Commit
a7ff14b
·
verified ·
1 Parent(s): 00d2932

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -23
app.py CHANGED
@@ -5,21 +5,20 @@ import gradio as gr
5
 
6
  PEFT_ID = "rishu834763/java-explainer-lora"
7
 
8
- # Load config to know the base model
9
  config = PeftConfig.from_pretrained(PEFT_ID)
10
  base = config.base_model_name_or_path
11
 
12
- # Load model (4-bit for free tier)
13
  model = AutoModelForCausalLM.from_pretrained(
14
  base,
15
- torch_dtype=torch.bfloat16,
16
  device_map="auto",
 
17
  load_in_4bit=True,
18
  )
19
 
20
- # Apply your LoRA and merge
21
  model = PeftModel.from_pretrained(model, PEFT_ID)
22
- model = model.merge_and_unload()
23
 
24
  # Tokenizer
25
  tokenizer = AutoTokenizer.from_pretrained(base)
@@ -35,38 +34,27 @@ pipe = pipeline(
35
  temperature=0.6,
36
  do_sample=True,
37
  top_p=0.9,
38
- repetition_penalty=1.1,
39
  )
40
 
41
- # ========= FIXED CHAT FUNCTION =========
42
  def chat(message, history):
43
  messages = []
44
-
45
- # Rebuild proper alternating messages, skipping empty assistant replies
46
  for user_msg, assistant_msg in history:
47
  messages.append({"role": "user", "content": user_msg})
48
- if assistant_msg: # ← only add assistant if it's not empty/None
49
  messages.append({"role": "assistant", "content": assistant_msg})
50
-
51
- # Add the new user message
52
  messages.append({"role": "user", "content": message})
53
 
54
- # Generate
55
  output = pipe(messages)[0]["generated_text"]
56
-
57
- # Extract only the last assistant reply
58
  return output[-1]["content"]
59
 
60
- # ========= GRADIO INTERFACE =========
61
  gr.ChatInterface(
62
  chat,
63
- title="Java Explainer – Your Own Fine-Tuned Model",
64
- description="Powered 100% by your LoRA on Mistral-7B-Instruct-v0.2",
65
  examples=[
66
- "Explain this Java code in simple terms:\npublic class Hello {\n public static void main(String[] args) {\n System.out.println(\"Hello World!\");\n }\n}",
67
- "What is the difference between ArrayList and LinkedList?",
68
- "Why do we use the synchronized keyword?",
69
- "Convert this Python factorial function to Java",
70
  ],
71
- cache_examples=False, # ← this was causing the caching error too
72
  ).queue().launch()
 
5
 
6
  PEFT_ID = "rishu834763/java-explainer-lora"
7
 
8
+ # Get base model name
9
  config = PeftConfig.from_pretrained(PEFT_ID)
10
  base = config.base_model_name_or_path
11
 
12
+ # Load base model in 4-bit
13
  model = AutoModelForCausalLM.from_pretrained(
14
  base,
 
15
  device_map="auto",
16
+ torch_dtype=torch.bfloat16,
17
  load_in_4bit=True,
18
  )
19
 
20
+ # Load LoRA weights on top — BUT DO NOT MERGE (this is the trick!)
21
  model = PeftModel.from_pretrained(model, PEFT_ID)
 
22
 
23
  # Tokenizer
24
  tokenizer = AutoTokenizer.from_pretrained(base)
 
34
  temperature=0.6,
35
  do_sample=True,
36
  top_p=0.9,
 
37
  )
38
 
 
39
  def chat(message, history):
40
  messages = []
 
 
41
  for user_msg, assistant_msg in history:
42
  messages.append({"role": "user", "content": user_msg})
43
+ if assistant_msg:
44
  messages.append({"role": "assistant", "content": assistant_msg})
 
 
45
  messages.append({"role": "user", "content": message})
46
 
 
47
  output = pipe(messages)[0]["generated_text"]
 
 
48
  return output[-1]["content"]
49
 
 
50
  gr.ChatInterface(
51
  chat,
52
+ title="Java Explainer – Your Model (Running!)",
53
+ description="100% your fine-tuned LoRA · No OpenAI · Instant start",
54
  examples=[
55
+ "Explain this Java code: public static void main(String[] args) { System.out.println(\"Hello\"); }",
56
+ "What does public static void main mean?",
57
+ "Difference between String and StringBuilder?",
 
58
  ],
59
+ cache_examples=False,
60
  ).queue().launch()