rishu834763 commited on
Commit
0244928
Β·
verified Β·
1 Parent(s): 6729932

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -41
app.py CHANGED
@@ -3,14 +3,16 @@ import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
4
  from peft import PeftModel
5
  import gradio as gr
 
 
 
 
 
6
 
7
  # ===================================
8
- # 1. Model & LoRA (your exact repo)
9
- # ===================================
10
- BASE_MODEL = "meta-llama/Meta-Llama-3-8B-Instruct" # do NOT change
11
- LORA_ADAPTER = "rishu834763/java-explainer-lora" # ← your LoRA
12
 
13
- # 4-bit quantization (fits on 1Γ—A100 40/80GB, 4090 24GB, T4 16GB with some offloading)
14
  quantization_config = BitsAndBytesConfig(
15
  load_in_4bit=True,
16
  bnb_4bit_quant_type="nf4",
@@ -22,7 +24,7 @@ print("Loading base model (Llama-3-8B-Instruct 4-bit)...")
22
  base_model = AutoModelForCausalLM.from_pretrained(
23
  BASE_MODEL,
24
  quantization_config=quantization_config,
25
- device_map="auto", # auto-offload to CPU if needed
26
  torch_dtype=torch.bfloat16,
27
  trust_remote_code=True,
28
  )
@@ -34,7 +36,7 @@ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
34
  tokenizer.pad_token = tokenizer.eos_token
35
 
36
  # ===================================
37
- # 2. Inference pipeline
38
  # ===================================
39
  pipe = torch.pipeline(
40
  "text-generation",
@@ -48,58 +50,32 @@ pipe = torch.pipeline(
48
  return_full_text=False,
49
  )
50
 
51
- # System prompt tuned for Java explanations
52
- SYSTEM_PROMPT = "You are an expert Java teacher. Explain concepts clearly, provide code examples, and answer in a concise but complete way."
53
 
54
  def chat(message: str, history):
55
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
56
-
57
- # Convert Gradio history β†’ Llama-3 format
58
  for user, assistant in history:
59
  messages.append({"role": "user", "content": user})
60
  if assistant:
61
  messages.append({"role": "assistant", "content": assistant})
62
-
63
  messages.append({"role": "user", "content": message})
64
 
65
- prompt = tokenizer.apply_chat_template(
66
- messages,
67
- tokenize=False,
68
- add_generation_prompt=True,
69
- )
70
-
71
  output = pipe(prompt)[0]["generated_text"]
72
  return output
73
 
74
  # ===================================
75
- # 3. Modern Gradio UI (2025)
76
- # ===================================
77
- with gr.Blocks(theme=gr.themes.Soft(), title="Java Explainer (Llama-3-8B + Your LoRA)") as demo:
78
- gr.Markdown("# πŸ§‘β€πŸ’» Java Explainer\nPowered by **rishu834763/java-explainer-lora** on Llama-3-8B-Instruct")
79
-
80
  chatbot = gr.Chatbot(height=620)
81
- msg = gr.Textbox(
82
- placeholder="Ask anything about Java (e.g. 'Explain Spring Boot @Autowired with example')",
83
- label="Your question",
84
- container=False,
85
- )
86
-
87
- with gr.Row():
88
- send = gr.Button("Send πŸš€", variant="primary")
89
- clear = gr.Button("Clear πŸ—‘οΈ")
90
 
91
  with gr.Row():
92
- retry = gr.Button("πŸ”„ Retry")
93
- undo = gr.Button("β†Ά Undo")
94
 
95
- # Events
96
  send.click(chat, [msg, chatbot], [msg, chatbot]).then(lambda: "", outputs=msg)
97
  msg.submit(chat, [msg, chatbot], [msg, chatbot]).then(lambda: "", outputs=msg)
98
  clear.click(lambda: None, None, chatbot, queue=False)
99
- retry.click(lambda h: h[:-1], chatbot, chatbot, queue=False)
100
- undo.click(lambda h: h[:-1], chatbot, chatbot, queue=False)
101
 
102
- demo.queue(max_size=64).launch(
103
- server_name="0.0.0.0",
104
- server_port=7860,
105
- )
 
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
4
  from peft import PeftModel
5
  import gradio as gr
6
+ import os
7
+
8
+ # THIS IS THE ONLY NEW LINE YOU NEED
9
+ from huggingface_hub import login
10
+ login(token=os.environ["HF_TOKEN"]) # ← This authenticates the Space
11
 
12
  # ===================================
13
+ BASE_MODEL = "meta-llama/Meta-Llama-3-8B-Instruct"
14
+ LORA_ADAPTER = "rishu834763/java-explainer-lora" # your LoRA
 
 
15
 
 
16
  quantization_config = BitsAndBytesConfig(
17
  load_in_4bit=True,
18
  bnb_4bit_quant_type="nf4",
 
24
  base_model = AutoModelForCausalLM.from_pretrained(
25
  BASE_MODEL,
26
  quantization_config=quantization_config,
27
+ device_map="auto",
28
  torch_dtype=torch.bfloat16,
29
  trust_remote_code=True,
30
  )
 
36
  tokenizer.pad_token = tokenizer.eos_token
37
 
38
  # ===================================
39
+ # Rest of the code stays exactly the same
40
  # ===================================
41
  pipe = torch.pipeline(
42
  "text-generation",
 
50
  return_full_text=False,
51
  )
52
 
53
+ SYSTEM_PROMPT = "You are an expert Java teacher. Explain concepts clearly, provide code examples, and answer concisely but completely."
 
54
 
55
  def chat(message: str, history):
56
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
 
 
57
  for user, assistant in history:
58
  messages.append({"role": "user", "content": user})
59
  if assistant:
60
  messages.append({"role": "assistant", "content": assistant})
 
61
  messages.append({"role": "user", "content": message})
62
 
63
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
 
 
 
 
64
  output = pipe(prompt)[0]["generated_text"]
65
  return output
66
 
67
  # ===================================
68
+ with gr.Blocks(theme=gr.themes.Soft(), title="Java Explainer") as demo:
69
+ gr.Markdown("# Java Explainer\nPowered by **rishu834763/java-explainer-lora** + Llama-3-8B")
 
 
 
70
  chatbot = gr.Chatbot(height=620)
71
+ msg = gr.Textbox(placeholder="Ask anything about Java...", label="Question", container=False)
 
 
 
 
 
 
 
 
72
 
73
  with gr.Row():
74
+ send = gr.Button("Send", variant="primary")
75
+ clear = gr.Button("Clear")
76
 
 
77
  send.click(chat, [msg, chatbot], [msg, chatbot]).then(lambda: "", outputs=msg)
78
  msg.submit(chat, [msg, chatbot], [msg, chatbot]).then(lambda: "", outputs=msg)
79
  clear.click(lambda: None, None, chatbot, queue=False)
 
 
80
 
81
+ demo.queue(max_size=64).launch(server_name="0.0.0.0", server_port=7860)