Spaces:

InnovisionLLC
/

example_test

Paused

App Files Files Community

Wenye He commited on Feb 17, 2025

Commit

8ff4f53

verified ·

1 Parent(s): 2d25646

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -48

app.py CHANGED Viewed

@@ -1,61 +1,35 @@
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
-# Choose your model – here we use GPT-2 as an example
-model_name = "gpt2"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name)
-def generate_response(user_input, chat_history):
-    """
-    This function takes the user's input and current conversation history,
-    appends the input to the history, builds the conversation string, and
-    generates a response using the local LLM.
-    """
     if chat_history is None:
         chat_history = []
-    # Append the user message to the conversation history.
-    chat_history.append(("User", user_input))
-    # Build a conversation string from the history.
-    conversation = ""
-    for speaker, message in chat_history:
-        conversation += f"{speaker}: {message}\n"
-    conversation += "AI:"  # Signal for the model to generate AI's response
-    # Tokenize the input and generate a response.
-    input_ids = tokenizer.encode(conversation, return_tensors="pt")
-    output_ids = model.generate(
-        input_ids,
-        max_length=input_ids.shape[1] + 50,  # Adjust max_length as needed
-        pad_token_id=tokenizer.eos_token_id
     )
-    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-    # Extract only the AI response (everything after the last "AI:" prompt).
-    ai_response = generated_text[len(conversation):].strip().split("\n")[0]
-    chat_history.append(("AI", ai_response))
-    # Return an empty string (to clear the input box) and updated chat history.
     return "", chat_history
-# Build the Gradio interface using Blocks for a flexible layout.
-with gr.Blocks() as demo:
-    gr.Markdown("# Local LLM Chatbot")
-    # Chatbot display widget
     chatbot = gr.Chatbot()
-    # Hidden state to hold the conversation history
     state = gr.State([])
-    # Textbox for user input
-    txt = gr.Textbox(placeholder="Enter your message and press Enter")
-    # When the textbox is submitted, generate a response.
-    txt.submit(generate_response, [txt, state], [txt, chatbot])
-# Launch the interface
-demo.launch()

 import gradio as gr
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# Use Phi model (ensure to pass trust_remote_code if required)
+model_name = "microsoft/Phi-3-mini-4k-instruct"
+model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+def generate_response_phi(user_input, chat_history):
     if chat_history is None:
         chat_history = []
+    # Append user message to the conversation as a dict (the Phi template expects this format)
+    chat_history.append({"role": "user", "content": user_input})
+    # Use the tokenizer's chat template to prepare inputs
+    inputs = tokenizer.apply_chat_template(
+        chat_history, add_generation_prompt=True, return_tensors="pt"
     )
+    # Generate response
+    output_ids = model.generate(**inputs, max_new_tokens=100)
+    generated_text = tokenizer.batch_decode(output_ids)[0]
+    # Extract assistant reply (assuming the template adds "<|assistant|>" marker)
+    answer = generated_text.split("<|assistant|>")[-1].strip()
+    chat_history.append({"role": "assistant", "content": answer})
     return "", chat_history
+with gr.Blocks() as phi_demo:
+    gr.Markdown("# Phi Chatbot")
     chatbot = gr.Chatbot()
     state = gr.State([])
+    txt = gr.Textbox(placeholder="Enter your message")
+    txt.submit(generate_response_phi, [txt, state], [txt, chatbot])
+phi_demo.launch()