Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import T5ForConditionalGeneration, T5Tokenizer | |
| from peft import get_peft_model, LoraConfig | |
| # Define the same LoRA configuration used during fine-tuning | |
| lora_config = LoraConfig( | |
| r=8, # Low-rank parameter | |
| lora_alpha=32, # Scaling parameter | |
| lora_dropout=0.1, # Dropout rate | |
| target_modules=["q", "v"], # The attention layers to apply LoRA to | |
| bias="none" | |
| ) | |
| # Load the model and tokenizer from Hugging Face's hub | |
| model = get_peft_model(T5ForConditionalGeneration.from_pretrained("danrdoran/flan-t5-simplified-squad"), lora_config) | |
| tokenizer = T5Tokenizer.from_pretrained("danrdoran/flan-t5-simplified-squad") | |
| # Streamlit app UI | |
| st.title("AI English Tutor") | |
| st.write("Ask me a question, and I will help you!") | |
| # Sidebar for user to control model generation parameters | |
| st.sidebar.title("Model Parameters") | |
| temperature = st.sidebar.slider("Temperature", 0.1, 1.5, 1.0, 0.1) # Default 1.0 | |
| top_p = st.sidebar.slider("Top-p (Nucleus Sampling)", 0.0, 1.0, 0.9, 0.05) # Default 0.9 | |
| top_k = st.sidebar.slider("Top-k", 0, 100, 50, 1) # Default 50 | |
| # Disable sampling when using beam search | |
| do_sample = st.sidebar.checkbox("Enable Random Sampling", value=False) | |
| # Input field for the student | |
| student_question = st.text_input("Ask your question!") | |
| # Generate and display response using the model's generate() function | |
| if student_question: | |
| # Prepare the input for the model | |
| input_text = f"You are a tutor. Explain the answer to this question to a young student: '{student_question}'" | |
| inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=256) # Reduced max_length to 256 | |
| # Generate response | |
| generated_ids = model.generate( | |
| inputs['input_ids'], | |
| #max_length=75, | |
| #min_length=20, | |
| temperature=temperature, | |
| top_p=top_p, | |
| top_k=top_k, | |
| do_sample=True, # Disable sampling, using beam search | |
| #num_beams=2, # Use beam search | |
| no_repeat_ngram_size=3, # Prevent repeating phrases of 3 words or more | |
| length_penalty=1.0, # Discourage overly long responses | |
| early_stopping=False # Stops when it finds a sufficiently good output | |
| ) | |
| # Decode the generated response | |
| response = tokenizer.decode(generated_ids[0], skip_special_tokens=True) | |
| st.write("Tutor's Answer:", response) |