|
|
import torch |
|
|
import streamlit as st |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
|
|
|
|
|
model_name = "khaledsayed1/llama_QA" |
|
|
model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda") |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
|
|
|
|
|
st.title("Question Answering using LLaMA Model") |
|
|
|
|
|
|
|
|
st.write("Enter your question below, and the model will generate an answer.") |
|
|
|
|
|
|
|
|
st.sidebar.header("Model Parameters") |
|
|
|
|
|
|
|
|
temperature = st.sidebar.slider("Temperature", 0.0, 1.5, 0.7, 0.1) |
|
|
top_k = st.sidebar.slider("Top-k", 1, 100, 50, 1) |
|
|
top_p = st.sidebar.slider("Top-p (nucleus sampling)", 0.0, 1.0, 0.95, 0.01) |
|
|
max_new_tokens = st.sidebar.slider("Max New Tokens", 1, 200, 128, 1) |
|
|
|
|
|
|
|
|
user_question = st.text_input("Your Question:", "") |
|
|
|
|
|
|
|
|
if user_question: |
|
|
|
|
|
alpaca_prompt = """ |
|
|
السؤال: {} |
|
|
الإجابة: |
|
|
""" |
|
|
formatted_prompt = alpaca_prompt.format(user_question) |
|
|
|
|
|
|
|
|
inputs = tokenizer([formatted_prompt], return_tensors="pt").to("cuda") |
|
|
|
|
|
|
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=max_new_tokens, |
|
|
temperature=temperature, |
|
|
top_k=top_k, |
|
|
top_p=top_p, |
|
|
use_cache=True |
|
|
) |
|
|
|
|
|
|
|
|
decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True) |
|
|
|
|
|
|
|
|
clean_output = decoded_output[0].replace("السؤال:", "").replace("الإجابة:", "").strip() |
|
|
|
|
|
|
|
|
bullet_points = clean_output.split(".") |
|
|
|
|
|
|
|
|
st.subheader("Model's Answer:") |
|
|
for point in bullet_points: |
|
|
if point.strip() and point.strip() != user_question: |
|
|
st.markdown(f"- {point.strip()}") |
|
|
|