belal271
/

llama-qa-app

Model card Files Files and versions

llama-qa-app / app.py

belal271's picture

Upload app.py with huggingface_hub

b205e81 verified 11 months ago

history blame contribute delete

2.56 kB

	import torch
	import streamlit as st
	from transformers import AutoModelForCausalLM, AutoTokenizer

	# Load the model and tokenizer from Hugging Face
	model_name = "khaledsayed1/llama_QA" # Replace with your actual model if different
	model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	# Title of the web page
	st.title("Question Answering using LLaMA Model")

	# Description for the user
	st.write("Enter your question below, and the model will generate an answer.")

	# Sidebar for controlling model parameters
	st.sidebar.header("Model Parameters")

	# Get the user-controlled values from the sidebar
	temperature = st.sidebar.slider("Temperature", 0.0, 1.5, 0.7, 0.1)
	top_k = st.sidebar.slider("Top-k", 1, 100, 50, 1)
	top_p = st.sidebar.slider("Top-p (nucleus sampling)", 0.0, 1.0, 0.95, 0.01)
	max_new_tokens = st.sidebar.slider("Max New Tokens", 1, 200, 128, 1)

	# User input (question)
	user_question = st.text_input("Your Question:", "")

	# If a question is entered, process it and show the answer
	if user_question:
	# Define the prompt with the user's question
	alpaca_prompt = """
	السؤال: {}
	الإجابة:
	"""
	formatted_prompt = alpaca_prompt.format(user_question)

	# Tokenize the input and move it to GPU
	inputs = tokenizer([formatted_prompt], return_tensors="pt").to("cuda")

	# Generate the output using the model
	outputs = model.generate(
	**inputs,
	max_new_tokens=max_new_tokens, # Number of tokens to generate
	temperature=temperature, # Controls randomness of output
	top_k=top_k, # Restricts to top-k most likely next tokens
	top_p=top_p, # Nucleus sampling
	use_cache=True # Use cached model weights for faster inference
	)

	# Decode the output
	decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)

	# Clean up the output and split into bullet points
	clean_output = decoded_output[0].replace("السؤال:", "").replace("الإجابة:", "").strip()

	# Split the output into sentences or phrases, and format them as bullet points
	bullet_points = clean_output.split(".")

	# Display the model's answer as bullet points
	st.subheader("Model's Answer:")
	for point in bullet_points:
	if point.strip() and point.strip() != user_question: # Ignore empty sentences and the question itself
	st.markdown(f"- {point.strip()}") # Use markdown to display bullets