Spaces:

pratikshahp
/

RAG-Chatbot

Runtime error

App Files Files Community

pratikshahp commited on Jun 13, 2024

Commit

11d58e9

verified ·

1 Parent(s): b8c4ec8

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -38

app.py CHANGED Viewed

@@ -1,51 +1,63 @@
 from llama_index.llms.huggingface import HuggingFaceLLM
 def messages_to_prompt(messages):
     prompt = ""
     for message in messages:
         if message.role == 'system':
-            prompt += f"<|system|>\n{message.content}</s>\n"
         elif message.role == 'user':
-            prompt += f"<|user|>\n{message.content}</s>\n"
         elif message.role == 'assistant':
-            prompt += f"<|assistant|>\n{message.content}</s>\n"
     # ensure we start with a system prompt, insert blank if needed
-    if not prompt.startswith("<|system|>\n"):
-        prompt = "<|system|>\n</s>\n" + prompt
     # add final assistant prompt
-    prompt = prompt + "<|assistant|>\n"
     return prompt
 def completion_to_prompt(completion):
-    return f"<|system|>\n</s>\n<|user|>\n{completion}</s>\n<|assistant|>\n"
-import torch
-from transformers import BitsAndBytesConfig
-from llama_index.core.prompts import PromptTemplate
-from llama_index.llms.huggingface import HuggingFaceLLM
-# quantize to save memory
-quantization_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_compute_dtype=torch.float16,
-    bnb_4bit_quant_type="nf4",
-    bnb_4bit_use_double_quant=True,
-)
-llm = HuggingFaceLLM(
-    model_name="HuggingFaceH4/zephyr-7b-beta",
-    tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
-    context_window=3900,
-    max_new_tokens=256,
-    model_kwargs={"quantization_config": quantization_config},
-    generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
-    messages_to_prompt=messages_to_prompt,
-    completion_to_prompt=completion_to_prompt,
-    device_map="auto",
-)
-response = llm.complete("What is the meaning of life?")
-print(str(response))

+import streamlit as st
+import torch
+from transformers import BitsAndBytesConfig
 from llama_index.llms.huggingface import HuggingFaceLLM
+# Function to convert messages to prompt
 def messages_to_prompt(messages):
     prompt = ""
     for message in messages:
         if message.role == 'system':
+            prompt += f"\n{message.content}</s>\n"
         elif message.role == 'user':
+            prompt += f"\n{message.content}</s>\n"
         elif message.role == 'assistant':
+            prompt += f"\n{message.content}</s>\n"
     # ensure we start with a system prompt, insert blank if needed
+    if not prompt.startswith("\n"):
+        prompt = "\n</s>\n" + prompt
     # add final assistant prompt
+    prompt = prompt + "\n"
     return prompt
+# Function to convert completion to prompt
 def completion_to_prompt(completion):
+    return f"\n</s>\n\n{completion}</s>\n\n"
+# Load the LLM without quantization
+@st.cache_resource
+def load_llm():
+    return HuggingFaceLLM(
+        model_name="HuggingFaceH4/zephyr-7b-beta",
+        tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
+        context_window=3900,
+        max_new_tokens=256,
+        generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
+        messages_to_prompt=messages_to_prompt,
+        completion_to_prompt=completion_to_prompt,
+        device_map="cpu"  # Use CPU
+    )
+llm = load_llm()
+# Streamlit app interface
+st.title("LLM Text Generation App")
+# Text input for the prompt
+user_input = st.text_area("Enter your prompt:", "")
+# Button to generate response
+if st.button("Generate Response"):
+    if user_input.strip() != "":
+        # Generate response based on the prompt
+        with st.spinner("Generating response..."):
+            response = llm.complete(user_input)
+        # Display the generated response
+        st.write("Generated Response:")
+        st.write(str(response))
+    else:
+        st.warning("Please enter a valid prompt.")