Spaces:

rishu834763
/

javacode_explainer

Runtime error

App Files Files Community

rishu834763 commited on Nov 22, 2025

Commit

0244928

verified ·

1 Parent(s): 6729932

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -41

app.py CHANGED Viewed

@@ -3,14 +3,16 @@ import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 from peft import PeftModel
 import gradio as gr
 # ===================================
-# 1. Model & LoRA (your exact repo)
-# ===================================
-BASE_MODEL = "meta-llama/Meta-Llama-3-8B-Instruct"   # do NOT change
-LORA_ADAPTER = "rishu834763/java-explainer-lora"     # ← your LoRA
-# 4-bit quantization (fits on 1×A100 40/80GB, 4090 24GB, T4 16GB with some offloading)
 quantization_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_quant_type="nf4",
@@ -22,7 +24,7 @@ print("Loading base model (Llama-3-8B-Instruct 4-bit)...")
 base_model = AutoModelForCausalLM.from_pretrained(
     BASE_MODEL,
     quantization_config=quantization_config,
-    device_map="auto",           # auto-offload to CPU if needed
     torch_dtype=torch.bfloat16,
     trust_remote_code=True,
 )
@@ -34,7 +36,7 @@ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
 tokenizer.pad_token = tokenizer.eos_token
 # ===================================
-# 2. Inference pipeline
 # ===================================
 pipe = torch.pipeline(
     "text-generation",
@@ -48,58 +50,32 @@ pipe = torch.pipeline(
     return_full_text=False,
 )
-# System prompt tuned for Java explanations
-SYSTEM_PROMPT = "You are an expert Java teacher. Explain concepts clearly, provide code examples, and answer in a concise but complete way."
 def chat(message: str, history):
     messages = [{"role": "system", "content": SYSTEM_PROMPT}]
-    # Convert Gradio history → Llama-3 format
     for user, assistant in history:
         messages.append({"role": "user", "content": user})
         if assistant:
             messages.append({"role": "assistant", "content": assistant})
     messages.append({"role": "user", "content": message})
-    prompt = tokenizer.apply_chat_template(
-        messages,
-        tokenize=False,
-        add_generation_prompt=True,
-    )
     output = pipe(prompt)[0]["generated_text"]
     return output
 # ===================================
-# 3. Modern Gradio UI (2025)
-# ===================================
-with gr.Blocks(theme=gr.themes.Soft(), title="Java Explainer (Llama-3-8B + Your LoRA)") as demo:
-    gr.Markdown("# 🧑‍💻 Java Explainer\nPowered by **rishu834763/java-explainer-lora** on Llama-3-8B-Instruct")
     chatbot = gr.Chatbot(height=620)
-    msg = gr.Textbox(
-        placeholder="Ask anything about Java (e.g. 'Explain Spring Boot @Autowired with example')",
-        label="Your question",
-        container=False,
-    )
-    with gr.Row():
-        send = gr.Button("Send 🚀", variant="primary")
-        clear = gr.Button("Clear 🗑️")
     with gr.Row():
-        retry = gr.Button("🔄 Retry")
-        undo = gr.Button("↶ Undo")
-    # Events
     send.click(chat, [msg, chatbot], [msg, chatbot]).then(lambda: "", outputs=msg)
     msg.submit(chat, [msg, chatbot], [msg, chatbot]).then(lambda: "", outputs=msg)
     clear.click(lambda: None, None, chatbot, queue=False)
-    retry.click(lambda h: h[:-1], chatbot, chatbot, queue=False)
-    undo.click(lambda h: h[:-1], chatbot, chatbot, queue=False)
-demo.queue(max_size=64).launch(
-    server_name="0.0.0.0",
-    server_port=7860,
-)

 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 from peft import PeftModel
 import gradio as gr
+import os
+# THIS IS THE ONLY NEW LINE YOU NEED
+from huggingface_hub import login
+login(token=os.environ["HF_TOKEN"])   # ← This authenticates the Space
 # ===================================
+BASE_MODEL = "meta-llama/Meta-Llama-3-8B-Instruct"
+LORA_ADAPTER = "rishu834763/java-explainer-lora"   # your LoRA
 quantization_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_quant_type="nf4",
 base_model = AutoModelForCausalLM.from_pretrained(
     BASE_MODEL,
     quantization_config=quantization_config,
+    device_map="auto",
     torch_dtype=torch.bfloat16,
     trust_remote_code=True,
 )
 tokenizer.pad_token = tokenizer.eos_token
 # ===================================
+# Rest of the code stays exactly the same
 # ===================================
 pipe = torch.pipeline(
     "text-generation",
     return_full_text=False,
 )
+SYSTEM_PROMPT = "You are an expert Java teacher. Explain concepts clearly, provide code examples, and answer concisely but completely."
 def chat(message: str, history):
     messages = [{"role": "system", "content": SYSTEM_PROMPT}]
     for user, assistant in history:
         messages.append({"role": "user", "content": user})
         if assistant:
             messages.append({"role": "assistant", "content": assistant})
     messages.append({"role": "user", "content": message})
+    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     output = pipe(prompt)[0]["generated_text"]
     return output
 # ===================================
+with gr.Blocks(theme=gr.themes.Soft(), title="Java Explainer") as demo:
+    gr.Markdown("# Java Explainer\nPowered by **rishu834763/java-explainer-lora** + Llama-3-8B")
     chatbot = gr.Chatbot(height=620)
+    msg = gr.Textbox(placeholder="Ask anything about Java...", label="Question", container=False)
     with gr.Row():
+        send = gr.Button("Send", variant="primary")
+        clear = gr.Button("Clear")
     send.click(chat, [msg, chatbot], [msg, chatbot]).then(lambda: "", outputs=msg)
     msg.submit(chat, [msg, chatbot], [msg, chatbot]).then(lambda: "", outputs=msg)
     clear.click(lambda: None, None, chatbot, queue=False)
+demo.queue(max_size=64).launch(server_name="0.0.0.0", server_port=7860)