Wenye He commited on
Commit
8ff4f53
·
verified ·
1 Parent(s): 2d25646

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -48
app.py CHANGED
@@ -1,61 +1,35 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
 
4
 
5
- # Choose your model here we use GPT-2 as an example
6
- model_name = "gpt2"
7
- tokenizer = AutoTokenizer.from_pretrained(model_name)
8
- model = AutoModelForCausalLM.from_pretrained(model_name)
9
 
10
- def generate_response(user_input, chat_history):
11
- """
12
- This function takes the user's input and current conversation history,
13
- appends the input to the history, builds the conversation string, and
14
- generates a response using the local LLM.
15
- """
16
  if chat_history is None:
17
  chat_history = []
 
 
18
 
19
- # Append the user message to the conversation history.
20
- chat_history.append(("User", user_input))
21
-
22
- # Build a conversation string from the history.
23
- conversation = ""
24
- for speaker, message in chat_history:
25
- conversation += f"{speaker}: {message}\n"
26
- conversation += "AI:" # Signal for the model to generate AI's response
27
-
28
- # Tokenize the input and generate a response.
29
- input_ids = tokenizer.encode(conversation, return_tensors="pt")
30
- output_ids = model.generate(
31
- input_ids,
32
- max_length=input_ids.shape[1] + 50, # Adjust max_length as needed
33
- pad_token_id=tokenizer.eos_token_id
34
  )
35
- generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
36
-
37
- # Extract only the AI response (everything after the last "AI:" prompt).
38
- ai_response = generated_text[len(conversation):].strip().split("\n")[0]
39
- chat_history.append(("AI", ai_response))
40
-
41
- # Return an empty string (to clear the input box) and updated chat history.
42
  return "", chat_history
43
 
44
- # Build the Gradio interface using Blocks for a flexible layout.
45
- with gr.Blocks() as demo:
46
- gr.Markdown("# Local LLM Chatbot")
47
-
48
- # Chatbot display widget
49
  chatbot = gr.Chatbot()
50
-
51
- # Hidden state to hold the conversation history
52
  state = gr.State([])
 
 
53
 
54
- # Textbox for user input
55
- txt = gr.Textbox(placeholder="Enter your message and press Enter")
56
-
57
- # When the textbox is submitted, generate a response.
58
- txt.submit(generate_response, [txt, state], [txt, chatbot])
59
-
60
- # Launch the interface
61
- demo.launch()
 
1
  import gradio as gr
 
2
  import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
+ # Use Phi model (ensure to pass trust_remote_code if required)
6
+ model_name = "microsoft/Phi-3-mini-4k-instruct"
7
+ model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
9
 
10
+ def generate_response_phi(user_input, chat_history):
 
 
 
 
 
11
  if chat_history is None:
12
  chat_history = []
13
+ # Append user message to the conversation as a dict (the Phi template expects this format)
14
+ chat_history.append({"role": "user", "content": user_input})
15
 
16
+ # Use the tokenizer's chat template to prepare inputs
17
+ inputs = tokenizer.apply_chat_template(
18
+ chat_history, add_generation_prompt=True, return_tensors="pt"
 
 
 
 
 
 
 
 
 
 
 
 
19
  )
20
+ # Generate response
21
+ output_ids = model.generate(**inputs, max_new_tokens=100)
22
+ generated_text = tokenizer.batch_decode(output_ids)[0]
23
+ # Extract assistant reply (assuming the template adds "<|assistant|>" marker)
24
+ answer = generated_text.split("<|assistant|>")[-1].strip()
25
+ chat_history.append({"role": "assistant", "content": answer})
 
26
  return "", chat_history
27
 
28
+ with gr.Blocks() as phi_demo:
29
+ gr.Markdown("# Phi Chatbot")
 
 
 
30
  chatbot = gr.Chatbot()
 
 
31
  state = gr.State([])
32
+ txt = gr.Textbox(placeholder="Enter your message")
33
+ txt.submit(generate_response_phi, [txt, state], [txt, chatbot])
34
 
35
+ phi_demo.launch()