rishu834763 commited on
Commit
44719c9
·
verified ·
1 Parent(s): 0244928

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -22
app.py CHANGED
@@ -1,17 +1,12 @@
1
  # app.py
2
  import torch
3
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
4
  from peft import PeftModel
5
  import gradio as gr
6
- import os
7
-
8
- # THIS IS THE ONLY NEW LINE YOU NEED
9
- from huggingface_hub import login
10
- login(token=os.environ["HF_TOKEN"]) # ← This authenticates the Space
11
 
12
  # ===================================
13
- BASE_MODEL = "meta-llama/Meta-Llama-3-8B-Instruct"
14
- LORA_ADAPTER = "rishu834763/java-explainer-lora" # your LoRA
15
 
16
  quantization_config = BitsAndBytesConfig(
17
  load_in_4bit=True,
@@ -20,7 +15,7 @@ quantization_config = BitsAndBytesConfig(
20
  bnb_4bit_use_double_quant=True,
21
  )
22
 
23
- print("Loading base model (Llama-3-8B-Instruct 4-bit)...")
24
  base_model = AutoModelForCausalLM.from_pretrained(
25
  BASE_MODEL,
26
  quantization_config=quantization_config,
@@ -29,16 +24,12 @@ base_model = AutoModelForCausalLM.from_pretrained(
29
  trust_remote_code=True,
30
  )
31
 
32
- print("Loading your LoRA adapter...")
33
  model = PeftModel.from_pretrained(base_model, LORA_ADAPTER)
34
-
35
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
36
  tokenizer.pad_token = tokenizer.eos_token
37
 
38
- # ===================================
39
- # Rest of the code stays exactly the same
40
- # ===================================
41
- pipe = torch.pipeline(
42
  "text-generation",
43
  model=model,
44
  tokenizer=tokenizer,
@@ -50,7 +41,7 @@ pipe = torch.pipeline(
50
  return_full_text=False,
51
  )
52
 
53
- SYSTEM_PROMPT = "You are an expert Java teacher. Explain concepts clearly, provide code examples, and answer concisely but completely."
54
 
55
  def chat(message: str, history):
56
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
@@ -61,17 +52,17 @@ def chat(message: str, history):
61
  messages.append({"role": "user", "content": message})
62
 
63
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
64
- output = pipe(prompt)[0]["generated_text"]
65
- return output
66
 
67
  # ===================================
68
- with gr.Blocks(theme=gr.themes.Soft(), title="Java Explainer") as demo:
69
- gr.Markdown("# Java Explainer\nPowered by **rishu834763/java-explainer-lora** + Llama-3-8B")
70
  chatbot = gr.Chatbot(height=620)
71
- msg = gr.Textbox(placeholder="Ask anything about Java...", label="Question", container=False)
72
 
73
  with gr.Row():
74
- send = gr.Button("Send", variant="primary")
75
  clear = gr.Button("Clear")
76
 
77
  send.click(chat, [msg, chatbot], [msg, chatbot]).then(lambda: "", outputs=msg)
 
1
  # app.py
2
  import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline # ← pipeline is here!
4
  from peft import PeftModel
5
  import gradio as gr
 
 
 
 
 
6
 
7
  # ===================================
8
+ BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B-Instruct" # Open, no gate!
9
+ LORA_ADAPTER = "rishu834763/java-explainer-lora"
10
 
11
  quantization_config = BitsAndBytesConfig(
12
  load_in_4bit=True,
 
15
  bnb_4bit_use_double_quant=True,
16
  )
17
 
18
+ print("Loading Llama-3.1-8B-Instruct 4-bit + your LoRA...")
19
  base_model = AutoModelForCausalLM.from_pretrained(
20
  BASE_MODEL,
21
  quantization_config=quantization_config,
 
24
  trust_remote_code=True,
25
  )
26
 
 
27
  model = PeftModel.from_pretrained(base_model, LORA_ADAPTER)
 
28
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
29
  tokenizer.pad_token = tokenizer.eos_token
30
 
31
+ # ← FIXED: pipeline from transformers, not torch
32
+ pipe = pipeline(
 
 
33
  "text-generation",
34
  model=model,
35
  tokenizer=tokenizer,
 
41
  return_full_text=False,
42
  )
43
 
44
+ SYSTEM_PROMPT = "You are an expert Java teacher. Explain concepts clearly with code examples."
45
 
46
  def chat(message: str, history):
47
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
 
52
  messages.append({"role": "user", "content": message})
53
 
54
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
55
+ outputs = pipe(prompt)
56
+ return outputs[0]["generated_text"]
57
 
58
  # ===================================
59
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
60
+ gr.Markdown("# Java Explainer\nPowered by your LoRA on Llama-3.1-8B-Instruct (4-bit)")
61
  chatbot = gr.Chatbot(height=620)
62
+ msg = gr.Textbox(placeholder="Ask anything about Java...", container=False)
63
 
64
  with gr.Row():
65
+ send = gr.Button("Send 🚀", variant="primary")
66
  clear = gr.Button("Clear")
67
 
68
  send.click(chat, [msg, chatbot], [msg, chatbot]).then(lambda: "", outputs=msg)