pratikshahp commited on
Commit
11d58e9
·
verified ·
1 Parent(s): b8c4ec8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -38
app.py CHANGED
@@ -1,51 +1,63 @@
 
 
 
1
  from llama_index.llms.huggingface import HuggingFaceLLM
2
 
 
3
  def messages_to_prompt(messages):
4
  prompt = ""
5
  for message in messages:
6
  if message.role == 'system':
7
- prompt += f"<|system|>\n{message.content}</s>\n"
8
  elif message.role == 'user':
9
- prompt += f"<|user|>\n{message.content}</s>\n"
10
  elif message.role == 'assistant':
11
- prompt += f"<|assistant|>\n{message.content}</s>\n"
12
-
13
  # ensure we start with a system prompt, insert blank if needed
14
- if not prompt.startswith("<|system|>\n"):
15
- prompt = "<|system|>\n</s>\n" + prompt
16
-
17
  # add final assistant prompt
18
- prompt = prompt + "<|assistant|>\n"
19
-
20
  return prompt
21
 
 
22
  def completion_to_prompt(completion):
23
- return f"<|system|>\n</s>\n<|user|>\n{completion}</s>\n<|assistant|>\n"
24
-
25
- import torch
26
- from transformers import BitsAndBytesConfig
27
- from llama_index.core.prompts import PromptTemplate
28
- from llama_index.llms.huggingface import HuggingFaceLLM
29
-
30
- # quantize to save memory
31
- quantization_config = BitsAndBytesConfig(
32
- load_in_4bit=True,
33
- bnb_4bit_compute_dtype=torch.float16,
34
- bnb_4bit_quant_type="nf4",
35
- bnb_4bit_use_double_quant=True,
36
- )
37
-
38
- llm = HuggingFaceLLM(
39
- model_name="HuggingFaceH4/zephyr-7b-beta",
40
- tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
41
- context_window=3900,
42
- max_new_tokens=256,
43
- model_kwargs={"quantization_config": quantization_config},
44
- generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
45
- messages_to_prompt=messages_to_prompt,
46
- completion_to_prompt=completion_to_prompt,
47
- device_map="auto",
48
- )
49
-
50
- response = llm.complete("What is the meaning of life?")
51
- print(str(response))
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ from transformers import BitsAndBytesConfig
4
  from llama_index.llms.huggingface import HuggingFaceLLM
5
 
6
+ # Function to convert messages to prompt
7
  def messages_to_prompt(messages):
8
  prompt = ""
9
  for message in messages:
10
  if message.role == 'system':
11
+ prompt += f"\n{message.content}</s>\n"
12
  elif message.role == 'user':
13
+ prompt += f"\n{message.content}</s>\n"
14
  elif message.role == 'assistant':
15
+ prompt += f"\n{message.content}</s>\n"
16
+
17
  # ensure we start with a system prompt, insert blank if needed
18
+ if not prompt.startswith("\n"):
19
+ prompt = "\n</s>\n" + prompt
20
+
21
  # add final assistant prompt
22
+ prompt = prompt + "\n"
23
+
24
  return prompt
25
 
26
+ # Function to convert completion to prompt
27
  def completion_to_prompt(completion):
28
+ return f"\n</s>\n\n{completion}</s>\n\n"
29
+
30
+ # Load the LLM without quantization
31
+ @st.cache_resource
32
+ def load_llm():
33
+ return HuggingFaceLLM(
34
+ model_name="HuggingFaceH4/zephyr-7b-beta",
35
+ tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
36
+ context_window=3900,
37
+ max_new_tokens=256,
38
+ generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
39
+ messages_to_prompt=messages_to_prompt,
40
+ completion_to_prompt=completion_to_prompt,
41
+ device_map="cpu" # Use CPU
42
+ )
43
+
44
+ llm = load_llm()
45
+
46
+ # Streamlit app interface
47
+ st.title("LLM Text Generation App")
48
+
49
+ # Text input for the prompt
50
+ user_input = st.text_area("Enter your prompt:", "")
51
+
52
+ # Button to generate response
53
+ if st.button("Generate Response"):
54
+ if user_input.strip() != "":
55
+ # Generate response based on the prompt
56
+ with st.spinner("Generating response..."):
57
+ response = llm.complete(user_input)
58
+
59
+ # Display the generated response
60
+ st.write("Generated Response:")
61
+ st.write(str(response))
62
+ else:
63
+ st.warning("Please enter a valid prompt.")