import streamlit as st from transformers import AutoTokenizer, AutoModelForCausalLM import torch from huggingface_hub import loginimport streamlit as st from transformers import AutoModelForCausalLM, AutoTokenizer import torch st.title("📚 AI Adaptive Learning (Local LLaMA)") # Hugging Face authentication for private models HF_API_TOKEN = st.secrets["HF_API_TOKEN"] login(token=HF_API_TOKEN) MODEL_ID = "meta-llama/Llama-2-7b-chat-hf" # or your private model @st.cache_resource def load_model(): tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_auth_token=HF_API_TOKEN) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, use_auth_token=HF_API_TOKEN, device_map="auto", # uses GPU if available, CPU otherwise ) return tokenizer, model tokenizer, model = load_model() # Load model & tokenizer MODEL_ID = "TheBloke/vicuna-7B-1.1-HF" # smaller public LLaMA-like model @st.cache_resource # caches model to avoid reload on every run def load_model(): tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, device_map="auto", # automatically uses CPU or GPU ) return tokenizer, model tokenizer, model = load_model() # Input question user_input = st.text_input("Ask a question:") # Generate answer locally if st.button("Submit") and user_input: inputs = tokenizer(user_input, return_tensors="pt") outputs = model.generate(**inputs, max_new_tokens=256) answer = tokenizer.decode(outputs[0], skip_special_tokens=True) st.subheader("AI Answer:") st.write(answer) user_input = st.text_input("Ask a question:")