Spaces:

rishu834763
/

javacode_explainer

Runtime error

App Files Files Community

rishu834763 commited on Nov 21, 2025

Commit

a7ff14b

verified ·

1 Parent(s): 00d2932

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -23

app.py CHANGED Viewed

@@ -5,21 +5,20 @@ import gradio as gr
 PEFT_ID = "rishu834763/java-explainer-lora"
-# Load config to know the base model
 config = PeftConfig.from_pretrained(PEFT_ID)
 base = config.base_model_name_or_path
-# Load model (4-bit for free tier)
 model = AutoModelForCausalLM.from_pretrained(
     base,
-    torch_dtype=torch.bfloat16,
     device_map="auto",
     load_in_4bit=True,
 )
-# Apply your LoRA and merge
 model = PeftModel.from_pretrained(model, PEFT_ID)
-model = model.merge_and_unload()
 # Tokenizer
 tokenizer = AutoTokenizer.from_pretrained(base)
@@ -35,38 +34,27 @@ pipe = pipeline(
     temperature=0.6,
     do_sample=True,
     top_p=0.9,
-    repetition_penalty=1.1,
 )
-# ========= FIXED CHAT FUNCTION =========
 def chat(message, history):
     messages = []
-    # Rebuild proper alternating messages, skipping empty assistant replies
     for user_msg, assistant_msg in history:
         messages.append({"role": "user", "content": user_msg})
-        if assistant_msg:  # ← only add assistant if it's not empty/None
             messages.append({"role": "assistant", "content": assistant_msg})
-    # Add the new user message
     messages.append({"role": "user", "content": message})
-    # Generate
     output = pipe(messages)[0]["generated_text"]
-    # Extract only the last assistant reply
     return output[-1]["content"]
-# ========= GRADIO INTERFACE =========
 gr.ChatInterface(
     chat,
-    title="Java Explainer – Your Own Fine-Tuned Model",
-    description="Powered 100% by your LoRA on Mistral-7B-Instruct-v0.2",
     examples=[
-        "Explain this Java code in simple terms:\npublic class Hello {\n    public static void main(String[] args) {\n        System.out.println(\"Hello World!\");\n    }\n}",
-        "What is the difference between ArrayList and LinkedList?",
-        "Why do we use the synchronized keyword?",
-        "Convert this Python factorial function to Java",
     ],
-    cache_examples=False,  # ← this was causing the caching error too
 ).queue().launch()

 PEFT_ID = "rishu834763/java-explainer-lora"
+# Get base model name
 config = PeftConfig.from_pretrained(PEFT_ID)
 base = config.base_model_name_or_path
+# Load base model in 4-bit
 model = AutoModelForCausalLM.from_pretrained(
     base,
     device_map="auto",
+    torch_dtype=torch.bfloat16,
     load_in_4bit=True,
 )
+# Load LoRA weights on top — BUT DO NOT MERGE (this is the trick!)
 model = PeftModel.from_pretrained(model, PEFT_ID)
 # Tokenizer
 tokenizer = AutoTokenizer.from_pretrained(base)
     temperature=0.6,
     do_sample=True,
     top_p=0.9,
 )
 def chat(message, history):
     messages = []
     for user_msg, assistant_msg in history:
         messages.append({"role": "user", "content": user_msg})
+        if assistant_msg:
             messages.append({"role": "assistant", "content": assistant_msg})
     messages.append({"role": "user", "content": message})
     output = pipe(messages)[0]["generated_text"]
     return output[-1]["content"]
 gr.ChatInterface(
     chat,
+    title="Java Explainer – Your Model (Running!)",
+    description="100% your fine-tuned LoRA · No OpenAI · Instant start",
     examples=[
+        "Explain this Java code: public static void main(String[] args) { System.out.println(\"Hello\"); }",
+        "What does public static void main mean?",
+        "Difference between String and StringBuilder?",
     ],
+    cache_examples=False,
 ).queue().launch()