import torch import streamlit as st from transformers import AutoModelForCausalLM, AutoTokenizer # Load the model and tokenizer from Hugging Face model_name = "khaledsayed1/llama_QA" # Replace with your actual model if different model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda") tokenizer = AutoTokenizer.from_pretrained(model_name) # Title of the web page st.title("Question Answering using LLaMA Model") # Description for the user st.write("Enter your question below, and the model will generate an answer.") # Sidebar for controlling model parameters st.sidebar.header("Model Parameters") # Get the user-controlled values from the sidebar temperature = st.sidebar.slider("Temperature", 0.0, 1.5, 0.7, 0.1) top_k = st.sidebar.slider("Top-k", 1, 100, 50, 1) top_p = st.sidebar.slider("Top-p (nucleus sampling)", 0.0, 1.0, 0.95, 0.01) max_new_tokens = st.sidebar.slider("Max New Tokens", 1, 200, 128, 1) # User input (question) user_question = st.text_input("Your Question:", "") # If a question is entered, process it and show the answer if user_question: # Define the prompt with the user's question alpaca_prompt = """ السؤال: {} الإجابة: """ formatted_prompt = alpaca_prompt.format(user_question) # Tokenize the input and move it to GPU inputs = tokenizer([formatted_prompt], return_tensors="pt").to("cuda") # Generate the output using the model outputs = model.generate( **inputs, max_new_tokens=max_new_tokens, # Number of tokens to generate temperature=temperature, # Controls randomness of output top_k=top_k, # Restricts to top-k most likely next tokens top_p=top_p, # Nucleus sampling use_cache=True # Use cached model weights for faster inference ) # Decode the output decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True) # Clean up the output and split into bullet points clean_output = decoded_output[0].replace("السؤال:", "").replace("الإجابة:", "").strip() # Split the output into sentences or phrases, and format them as bullet points bullet_points = clean_output.split(".") # Display the model's answer as bullet points st.subheader("Model's Answer:") for point in bullet_points: if point.strip() and point.strip() != user_question: # Ignore empty sentences and the question itself st.markdown(f"- {point.strip()}") # Use markdown to display bullets