Spaces:

Shriti09
/

MicrosoftPhiQloraExample

Sleeping

App Files Files Community

Shriti09 commited on Mar 21, 2025

Commit

89ef257

verified ·

1 Parent(s): 5010915

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -33

app.py CHANGED Viewed

@@ -2,7 +2,6 @@ import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from peft import PeftModel
 import gradio as gr
-import os
 # Use GPU if available
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -12,16 +11,18 @@ base_model_name = "microsoft/phi-2"  # Pull from HF Hub directly
 adapter_path = "Shriti09/Microsoft-Phi-QLora"  # Update with your Hugging Face repo path
 print("🔧 Loading base model...")
-# Using the Accelerator to load the model and dispatch to the correct devices
 base_model = AutoModelForCausalLM.from_pretrained(
     base_model_name,
     torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
 )
 print("🔧 Loading LoRA adapter...")
 adapter_model = PeftModel.from_pretrained(base_model, adapter_path)
 print("🔗 Merging adapter into base model...")
 merged_model = adapter_model.merge_and_unload()
 merged_model.eval()
@@ -29,16 +30,10 @@ merged_model.eval()
 tokenizer = AutoTokenizer.from_pretrained(base_model_name)
 print("✅ Model ready for inference!")
-# Chat function with history
-def chat_fn(message, history):
-    # Convert history to the required format for gr.Chatbot (list of dictionaries with role and content)
-    full_prompt = ""
-    for user_msg, bot_msg in history:
-        full_prompt += f"User: {user_msg}\nAI: {bot_msg}\n"
-    full_prompt += f"User: {message}\nAI:"
-    # Tokenize inputs
-    inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = merged_model.generate(
@@ -50,30 +45,20 @@ def chat_fn(message, history):
             pad_token_id=tokenizer.eos_token_id
         )
-    # Decode and return only the AI's latest response
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    response = response.split("AI:")[-1].strip()
-    # Append to history in the correct format for gr.Chatbot (list of dictionaries)
-    history.append({"role": "user", "content": message})
-    history.append({"role": "assistant", "content": response})
-    return history, history
 # Gradio UI
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("<h1>🧠 Phi-2 QLoRA Chatbot</h1>")
-    # Use 'type' parameter to specify message format for gr.Chatbot()
-    chatbot = gr.Chatbot(type="messages")  # Use 'messages' type for structured messages
-    message = gr.Textbox(label="Your message:")
-    clear = gr.Button("Clear chat")
-    state = gr.State([])
-    message.submit(chat_fn, [message, state], [chatbot, state])
-    clear.click(lambda: [], None, chatbot)
-    clear.click(lambda: [], None, state)
-# Run the app without the 'concurrency_count' argument and share the app publicly
-demo.queue().launch(share=True)

 from transformers import AutoModelForCausalLM, AutoTokenizer
 from peft import PeftModel
 import gradio as gr
 # Use GPU if available
 device = "cuda" if torch.cuda.is_available() else "cpu"
 adapter_path = "Shriti09/Microsoft-Phi-QLora"  # Update with your Hugging Face repo path
 print("🔧 Loading base model...")
+# Load the base model
 base_model = AutoModelForCausalLM.from_pretrained(
     base_model_name,
     torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
 )
 print("🔧 Loading LoRA adapter...")
+# Load the LoRA adapter
 adapter_model = PeftModel.from_pretrained(base_model, adapter_path)
 print("🔗 Merging adapter into base model...")
+# Merge adapter into the base model
 merged_model = adapter_model.merge_and_unload()
 merged_model.eval()
 tokenizer = AutoTokenizer.from_pretrained(base_model_name)
 print("✅ Model ready for inference!")
+# Text generation function
+def generate_text(prompt):
+    # Tokenize the input
+    inputs = tokenizer(prompt, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = merged_model.generate(
             pad_token_id=tokenizer.eos_token_id
         )
+    # Decode and return the generated response
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return response
 # Gradio UI
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("<h1>🧠 Phi-2 QLoRA Text Generator</h1>")
+    # Textbox for user input and a button to generate text
+    prompt = gr.Textbox(label="Enter your prompt:", lines=2)
+    output = gr.Textbox(label="Generated text:", lines=5)
+    # Generate text when the button is clicked
+    prompt.submit(generate_text, prompt, output)
+# Launch the app
+demo.launch(share=True)