llama-qa-app / app.py
belal271's picture
Upload app.py with huggingface_hub
b205e81 verified
import torch
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer
# Load the model and tokenizer from Hugging Face
model_name = "khaledsayed1/llama_QA" # Replace with your actual model if different
model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Title of the web page
st.title("Question Answering using LLaMA Model")
# Description for the user
st.write("Enter your question below, and the model will generate an answer.")
# Sidebar for controlling model parameters
st.sidebar.header("Model Parameters")
# Get the user-controlled values from the sidebar
temperature = st.sidebar.slider("Temperature", 0.0, 1.5, 0.7, 0.1)
top_k = st.sidebar.slider("Top-k", 1, 100, 50, 1)
top_p = st.sidebar.slider("Top-p (nucleus sampling)", 0.0, 1.0, 0.95, 0.01)
max_new_tokens = st.sidebar.slider("Max New Tokens", 1, 200, 128, 1)
# User input (question)
user_question = st.text_input("Your Question:", "")
# If a question is entered, process it and show the answer
if user_question:
# Define the prompt with the user's question
alpaca_prompt = """
السؤال: {}
الإجابة:
"""
formatted_prompt = alpaca_prompt.format(user_question)
# Tokenize the input and move it to GPU
inputs = tokenizer([formatted_prompt], return_tensors="pt").to("cuda")
# Generate the output using the model
outputs = model.generate(
**inputs,
max_new_tokens=max_new_tokens, # Number of tokens to generate
temperature=temperature, # Controls randomness of output
top_k=top_k, # Restricts to top-k most likely next tokens
top_p=top_p, # Nucleus sampling
use_cache=True # Use cached model weights for faster inference
)
# Decode the output
decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)
# Clean up the output and split into bullet points
clean_output = decoded_output[0].replace("السؤال:", "").replace("الإجابة:", "").strip()
# Split the output into sentences or phrases, and format them as bullet points
bullet_points = clean_output.split(".")
# Display the model's answer as bullet points
st.subheader("Model's Answer:")
for point in bullet_points:
if point.strip() and point.strip() != user_question: # Ignore empty sentences and the question itself
st.markdown(f"- {point.strip()}") # Use markdown to display bullets