File size: 1,953 Bytes
11d58e9
 
 
7b74d38
26c867e
11d58e9
06f1c76
 
 
 
11d58e9
06f1c76
11d58e9
06f1c76
11d58e9
 
06f1c76
11d58e9
 
 
06f1c76
11d58e9
 
06f1c76
df42853
11d58e9
06f1c76
11d58e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import streamlit as st
import torch
from transformers import BitsAndBytesConfig
from llama_index.llms.huggingface import HuggingFaceLLM

# Function to convert messages to prompt
def messages_to_prompt(messages):
    prompt = ""
    for message in messages:
        if message.role == 'system':
            prompt += f"\n{message.content}</s>\n"
        elif message.role == 'user':
            prompt += f"\n{message.content}</s>\n"
        elif message.role == 'assistant':
            prompt += f"\n{message.content}</s>\n"
    
    # ensure we start with a system prompt, insert blank if needed
    if not prompt.startswith("\n"):
        prompt = "\n</s>\n" + prompt
    
    # add final assistant prompt
    prompt = prompt + "\n"
    
    return prompt

# Function to convert completion to prompt
def completion_to_prompt(completion):
    return f"\n</s>\n\n{completion}</s>\n\n"

# Load the LLM without quantization
@st.cache_resource
def load_llm():
    return HuggingFaceLLM(
        model_name="HuggingFaceH4/zephyr-7b-beta",
        tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
        context_window=3900,
        max_new_tokens=256,
        generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
        messages_to_prompt=messages_to_prompt,
        completion_to_prompt=completion_to_prompt,
        device_map="cpu"  # Use CPU
    )

llm = load_llm()

# Streamlit app interface
st.title("LLM Text Generation App")

# Text input for the prompt
user_input = st.text_area("Enter your prompt:", "")

# Button to generate response
if st.button("Generate Response"):
    if user_input.strip() != "":
        # Generate response based on the prompt
        with st.spinner("Generating response..."):
            response = llm.complete(user_input)
        
        # Display the generated response
        st.write("Generated Response:")
        st.write(str(response))
    else:
        st.warning("Please enter a valid prompt.")