Spaces:

pratikshahp
/

RAG-Chatbot

Runtime error

File size: 1,953 Bytes

11d58e9
 
 
7b74d38
26c867e
11d58e9
06f1c76
 
 
 
11d58e9
06f1c76
11d58e9
06f1c76
11d58e9
 
06f1c76
11d58e9
 
 
06f1c76
11d58e9
 
06f1c76
df42853
11d58e9
06f1c76
11d58e9

import streamlit as st
import torch
from transformers import BitsAndBytesConfig
from llama_index.llms.huggingface import HuggingFaceLLM

# Function to convert messages to prompt
def messages_to_prompt(messages):
    prompt = ""
    for message in messages:
        if message.role == 'system':
            prompt += f"\n{message.content}</s>\n"
        elif message.role == 'user':
            prompt += f"\n{message.content}</s>\n"
        elif message.role == 'assistant':
            prompt += f"\n{message.content}</s>\n"
    
    # ensure we start with a system prompt, insert blank if needed
    if not prompt.startswith("\n"):
        prompt = "\n</s>\n" + prompt
    
    # add final assistant prompt
    prompt = prompt + "\n"
    
    return prompt

# Function to convert completion to prompt
def completion_to_prompt(completion):
    return f"\n</s>\n\n{completion}</s>\n\n"

# Load the LLM without quantization
@st.cache_resource
def load_llm():
    return HuggingFaceLLM(
        model_name="HuggingFaceH4/zephyr-7b-beta",
        tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
        context_window=3900,
        max_new_tokens=256,
        generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
        messages_to_prompt=messages_to_prompt,
        completion_to_prompt=completion_to_prompt,
        device_map="cpu"  # Use CPU
    )

llm = load_llm()

# Streamlit app interface
st.title("LLM Text Generation App")

# Text input for the prompt
user_input = st.text_area("Enter your prompt:", "")

# Button to generate response
if st.button("Generate Response"):
    if user_input.strip() != "":
        # Generate response based on the prompt
        with st.spinner("Generating response..."):
            response = llm.complete(user_input)
        
        # Display the generated response
        st.write("Generated Response:")
        st.write(str(response))
    else:
        st.warning("Please enter a valid prompt.")