rishu834763 commited on
Commit
d7830ff
Β·
verified Β·
1 Parent(s): 5d81a15

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -30
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py – CPU-only, ultra-stable, works on any HF Space (free tier included)
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
  from peft import PeftModel
@@ -7,61 +7,100 @@ import gradio as gr
7
  BASE_MODEL = "mistralai/Mistral-7B-Instruct-v0.2"
8
  LORA_ADAPTER = "rishu834763/java-explainer-lora"
9
 
10
- print("Loading Mistral-7B-Instruct-v0.2 + your Java LoRA (8-bit CPU mode)...")
11
 
12
- # 8-bit on CPU = ~7–8 GB RAM, fits perfectly on every HF Space (even free CPU)
13
  model = AutoModelForCausalLM.from_pretrained(
14
  BASE_MODEL,
15
- device_map="auto", # will automatically use CPU if no GPU
16
- load_in_8bit=True, # ← 8-bit instead of 4-bit β†’ no CUDA driver needed
17
  torch_dtype=torch.float16,
18
  low_cpu_mem_usage=True,
19
  )
20
 
21
- # Apply your LoRA (adds only ~168 MB)
22
  model = PeftModel.from_pretrained(model, LORA_ADAPTER)
23
-
24
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
25
  tokenizer.pad_token = tokenizer.eos_token
26
 
 
27
  pipe = pipeline(
28
  "text-generation",
29
  model=model,
30
  tokenizer=tokenizer,
31
  max_new_tokens=1024,
32
- temperature=0.3,
33
  top_p=0.95,
34
  do_sample=True,
35
- repetition_penalty=1.15,
36
  return_full_text=False,
 
 
37
  )
38
 
39
- SYSTEM_PROMPT = "You are an expert Java teacher. Always explain clearly with clean, runnable code examples."
40
-
41
- def chat(message: str, history):
42
- messages = [{"role": "system", "content": SYSTEM_PROMPT}]
43
- for user, assistant in history:
44
- messages.append({"role": "user", "content": user})
45
- if assistant:
46
- messages.append({"role": "assistant", "content": assistant})
47
- messages.append({"role": "user", "content": message})
48
 
 
 
 
 
 
 
 
 
49
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
50
- output = pipe(prompt)[0]["generated_text"]
 
51
  return output
52
 
53
- # UI
54
- with gr.Blocks(theme=gr.themes.Soft(), title="Java Explainer") as demo:
55
- gr.Markdown("# Java Explainer Pro\nPowered by **rishu834763/java-explainer-lora** + Mistral-7B-v0.2 (CPU 8-bit)")
56
- chatbot = gr.Chatbot(height=620)
57
- msg = gr.Textbox(placeholder="Ask anything about Java...", container=False)
58
 
59
  with gr.Row():
60
- send = gr.Button("Send", variant="primary")
61
- clear = gr.Button("Clear")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
- send.click(chat, [msg, chatbot], [msg, chatbot]).then(lambda: "", outputs=msg)
64
- msg.submit(chat, [msg, chatbot], [msg, chatbot]).then(lambda: "", outputs=msg)
65
- clear.click(lambda: None, None, chatbot, queue=False)
66
 
67
- demo.queue(max_size=100).launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
1
+ # app.py – FINAL VERSION (November 2025) – Instant output, dual input
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
  from peft import PeftModel
 
7
  BASE_MODEL = "mistralai/Mistral-7B-Instruct-v0.2"
8
  LORA_ADAPTER = "rishu834763/java-explainer-lora"
9
 
10
+ print("Loading your Java Explainer (8-bit CPU mode – super fast & stable)...")
11
 
12
+ # 8-bit CPU = perfect balance: fast, low RAM, no CUDA needed
13
  model = AutoModelForCausalLM.from_pretrained(
14
  BASE_MODEL,
15
+ load_in_8bit=True,
16
+ device_map="auto",
17
  torch_dtype=torch.float16,
18
  low_cpu_mem_usage=True,
19
  )
20
 
 
21
  model = PeftModel.from_pretrained(model, LORA_ADAPTER)
 
22
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
23
  tokenizer.pad_token = tokenizer.eos_token
24
 
25
+ # Fast pipeline settings for instant response
26
  pipe = pipeline(
27
  "text-generation",
28
  model=model,
29
  tokenizer=tokenizer,
30
  max_new_tokens=1024,
31
+ temperature=0.2,
32
  top_p=0.95,
33
  do_sample=True,
34
+ repetition_penalty=1.18,
35
  return_full_text=False,
36
+ eos_token_id=tokenizer.eos_token_id,
37
+ pad_token_id=tokenizer.pad_token_id,
38
  )
39
 
40
+ SYSTEM_PROMPT = """You are the world's best Java teacher.
41
+ Always respond with:
42
+ β€’ Clear explanation
43
+ β€’ Clean, runnable, modern Java code (Java 17+)
44
+ β€’ Best practices (records, var, sealed classes, etc.)
45
+ β€’ Fix any bugs or bad patterns
46
+ Never say "I can't see the code" β€” always assume it's provided."""
 
 
47
 
48
+ def generate(instruction: str, code: str = ""):
49
+ user_input = f"### Instruction:\n{instruction.strip()}\n\n### Code (if any):\n{code.strip()}" if code.strip() else instruction.strip()
50
+
51
+ messages = [
52
+ {"role": "system", "content": SYSTEM_PROMPT},
53
+ {"role": "user", "content": user_input}
54
+ ]
55
+
56
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
57
+
58
+ output = pipe(prompt, max_new_tokens=1024)[0]["generated_text"].strip()
59
  return output
60
 
61
+ # Beautiful dual-input UI
62
+ with gr.Blocks(theme=gr.themes.Soft(), title="Java Explainer Pro") as demo:
63
+ gr.Markdown("# Java Explainer Pro\nAsk anything β€” explain, fix, improve, teach")
 
 
64
 
65
  with gr.Row():
66
+ with gr.Column(scale=1):
67
+ instruction = gr.Textbox(
68
+ label="Instruction / Question",
69
+ placeholder="e.g. Explain this code / Fix this bug / Convert to Java records / Make it thread-safe / Best way to read a file in Java 17",
70
+ lines=6
71
+ )
72
+ code_input = gr.Code(
73
+ label="Java Code (optional)",
74
+ language="java",
75
+ lines=12,
76
+ placeholder="// Paste your Java code here (or leave empty)"
77
+ )
78
+
79
+ with gr.Row():
80
+ submit = gr.Button("Explain / Fix / Improve", variant="primary", size="lg")
81
+ clear = gr.Button("Clear")
82
+
83
+ with gr.Column(scale=1):
84
+ output = gr.Markdown(label="Answer")
85
+
86
+ # Instant generation
87
+ submit.click(
88
+ fn=generate,
89
+ inputs=[instruction, code_input],
90
+ outputs=output
91
+ )
92
+
93
+ # Also allow Enter key
94
+ instruction.submit(
95
+ fn=generate,
96
+ inputs=[instruction, code_input],
97
+ outputs=output
98
+ )
99
 
100
+ clear.click(lambda: ("", "", ""), None, [instruction, code_input, output])
 
 
101
 
102
+ demo.queue(max_size=20).launch(
103
+ server_name="0.0.0.0",
104
+ server_port=7860,
105
+ share=True
106
+ )