Spaces:

rishu834763
/

javacode_explainer

Runtime error

App Files Files Community

rishu834763 commited on Nov 21, 2025

Commit

6e3b283

verified ·

1 Parent(s): c94c675

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -51

app.py CHANGED Viewed

@@ -3,74 +3,46 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from peft import PeftModel, PeftConfig
 import gradio as gr
-# === Load your LoRA correctly ===
-peft_model_id = "rishu834763/java-explainer-lora"
-config = PeftConfig.from_pretrained(peft_model_id)
-base_model_name = config.base_model_name_or_path  # this will be mistralai/Mistral-7B-Instruct-v0.2
-# Load base model (with quantization if you want to fit in free tier)
 model = AutoModelForCausalLM.from_pretrained(
     base_model_name,
     torch_dtype=torch.bfloat16,
     device_map="auto",
-    # Remove the two lines below if you have enough VRAM or a paid Space
-    # load_in_4bit=True,
-    # quantization_config=BitsAndBytesConfig(
-    #     load_in_4bit=True,
-    #     bnb_4bit_compute_dtype=torch.bfloat16,
-    #     bnb_4bit_use_double_quant=True,
-    #     bnb_4bit_quant_type="nf4"
-    # ),
 )
-model = PeftModel.from_pretrained(model, peft_model_id)
-# Optional but recommended: merge so inference is faster and uses less VRAM
 model = model.merge_and_unload()
 tokenizer = AutoTokenizer.from_pretrained(base_model_name)
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
-# Create the pipeline using YOUR model and tokenizer
-pipe = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-    torch_dtype=torch.bfloat16,
-    device_map="auto",
-)
-# === This is the most important part ===
-def chat(message, history):
     messages = []
-    for user_msg, assistant_msg in history:
-        messages.append({"role": "user", "content": user_msg})
-        if assistant_msg:
-            messages.append({"role": "assistant", "content": assistant_msg})
     messages.append({"role": "user", "content": message})
-    outputs = pipe(
-        messages,
-        max_new_tokens=512,
-        do_sample=True,
-        temperature=0.7,
-        top_p=0.9,
-        pad_token_id=tokenizer.eos_token_id
-    )
-    response = outputs[0]["generated_text"][-1]["content"]
-    return response
-# === Build the Gradio interface ===
-demo = gr.ChatInterface(
-    fn=chat,
-    title="Java Explainer (Mistral-7B + your LoRA)",
-    description="Ask anything about Java code → I will explain it using your fine-tuned model",
     examples=[
-        "Explain this Java code: public class HelloWorld { public static void main(String[] args) { System.out.println(\"Hello, World!\"); } }",
-        "What does synchronized keyword do in Java?"
-    ],
-    cache_examples=False,
-)
-demo.launch()

 from peft import PeftModel, PeftConfig
 import gradio as gr
+PEFT_MODEL_ID = "rishu834763/java-explainer-lora"
+config = PeftConfig.from_pretrained(PEFT_MODEL_ID)
+base_model_name = config.base_model_name_or_path
+print(f"Loading base model: {base_model_name}")
 model = AutoModelForCausalLM.from_pretrained(
     base_model_name,
     torch_dtype=torch.bfloat16,
     device_map="auto",
+    load_in_4bit=True,  # removes this line only if you upgrade to Pro
 )
+model = PeftModel.from_pretrained(model, PEFT_MODEL_ID)
 model = model.merge_and_unload()
 tokenizer = AutoTokenizer.from_pretrained(base_model_name)
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+def respond(message, history):
     messages = []
+    for user, assistant in history:
+        messages.append({"role": "user", "content": user})
+        if assistant:
+            messages.append({"role": "assistant", "content": assistant})
     messages.append({"role": "user", "content": message})
+    output = pipe(messages, max_new_tokens=1024, temperature=0.6, do_sample=True)
+    return output[0]["generated_text"][-1]["content"]
+gr.ChatInterface(
+    respond,
+    title="Java Explainer – Your Own Fine-Tuned Model",
+    description="This is 100% your LoRA model, not ChatGPT, not Mistral, not anything else.",
     examples=[
+        "Explain this Java code in simple terms: public class Hello { public static void main(String[] args) { System.out.println(\"Hello World\"); }}",
+        "What is the difference between == and .equals() in Java?",
+        "Why do we mark methods as static in main?"
+    ]
+).queue().launch()