Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import torch | |
| from transformers import BitsAndBytesConfig | |
| from llama_index.llms.huggingface import HuggingFaceLLM | |
| # Function to convert messages to prompt | |
| def messages_to_prompt(messages): | |
| prompt = "" | |
| for message in messages: | |
| if message.role == 'system': | |
| prompt += f"\n{message.content}</s>\n" | |
| elif message.role == 'user': | |
| prompt += f"\n{message.content}</s>\n" | |
| elif message.role == 'assistant': | |
| prompt += f"\n{message.content}</s>\n" | |
| # ensure we start with a system prompt, insert blank if needed | |
| if not prompt.startswith("\n"): | |
| prompt = "\n</s>\n" + prompt | |
| # add final assistant prompt | |
| prompt = prompt + "\n" | |
| return prompt | |
| # Function to convert completion to prompt | |
| def completion_to_prompt(completion): | |
| return f"\n</s>\n\n{completion}</s>\n\n" | |
| # Load the LLM without quantization | |
| def load_llm(): | |
| return HuggingFaceLLM( | |
| model_name="HuggingFaceH4/zephyr-7b-beta", | |
| tokenizer_name="HuggingFaceH4/zephyr-7b-beta", | |
| context_window=3900, | |
| max_new_tokens=256, | |
| generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95}, | |
| messages_to_prompt=messages_to_prompt, | |
| completion_to_prompt=completion_to_prompt, | |
| device_map="cpu" # Use CPU | |
| ) | |
| llm = load_llm() | |
| # Streamlit app interface | |
| st.title("LLM Text Generation App") | |
| # Text input for the prompt | |
| user_input = st.text_area("Enter your prompt:", "") | |
| # Button to generate response | |
| if st.button("Generate Response"): | |
| if user_input.strip() != "": | |
| # Generate response based on the prompt | |
| with st.spinner("Generating response..."): | |
| response = llm.complete(user_input) | |
| # Display the generated response | |
| st.write("Generated Response:") | |
| st.write(str(response)) | |
| else: | |
| st.warning("Please enter a valid prompt.") | |