File size: 2,620 Bytes
012cbfb
e30f5ff
5d81a15
6a9665a
e30f5ff
 
5d81a15
44719c9
e30f5ff
012cbfb
cd50342
012cbfb
5d81a15
94da46e
d7830ff
012cbfb
5d81a15
 
e30f5ff
00d2932
012cbfb
5d81a15
cd77f87
cd50342
cd77f87
 
e30f5ff
012cbfb
44719c9
6729932
 
 
5d81a15
d7830ff
6729932
 
d7830ff
6729932
 
 
012cbfb
 
 
 
 
 
00d2932
d7830ff
cd77f87
d7830ff
 
 
 
 
 
0244928
012cbfb
 
cd50342
012cbfb
d7830ff
4525549
6729932
 
012cbfb
d7830ff
012cbfb
 
 
d7830ff
 
 
 
012cbfb
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# app.py – FINAL, ACTUALLY WORKING VERSION (November 2025)
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from peft import PeftModel
import gradio as gr

BASE_MODEL   = "mistralai/Mistral-7B-Instruct-v0.2"
LORA_ADAPTER = "rishu834763/java-explainer-lora"

print("Loading Java Explainer (8-bit CPU) – please wait ~90 seconds...")

# This combination NEVER fails on any HF Space
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    load_in_8bit=True,
    device_map="auto",           # "auto" works perfectly with 8-bit (no offload error)
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
)

# Apply your LoRA
model = PeftModel.from_pretrained(model, LORA_ADAPTER)

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Fast and reliable pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=1024,
    temperature=0.2,
    top_p=0.95,
    do_sample=True,
    repetition_penalty=1.18,
    return_full_text=False,
)

SYSTEM_PROMPT = """You are the world's best Java teacher with 20+ years of experience.
Always give:
• Clear, step-by-step explanation
• Clean, modern, runnable Java code (Java 17+)
• Fix any bugs or bad practices
• Use records, var, streams, sealed classes, etc. when appropriate"""

def generate(instruction: str, code: str = ""):
    user_input = f"### Instruction:\n{instruction.strip()}\n\n### Code:\n{code.strip()}" if code.strip() else instruction.strip()
    
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user",   "content": user_input}
    ]
    
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    result = pipe(prompt)[0]["generated_text"].strip()
    return result

# Beautiful working UI (Gradio 4.100+ compatible)
with gr.Blocks(theme=gr.themes.Soft(), title="Java Explainer Pro") as demo:
    gr.Markdown("# Java Explainer Pro\nYour personal senior Java mentor is ready")

    with gr.Row():
        with gr.Column(scale=1):
            instruction = gr.Textbox(
                label="What do you want to know or fix?",
                placeholder="Explain this code · Fix this bug · Make it thread-safe · Convert to records · Best way to read JSON in Java 17",
                lines=4
            )
            code_input = gr.Code(
                label="Java Code (optional)",
                language="java",
                lines=16,
                value="// Paste your code here (