File size: 1,675 Bytes
efa1613 ba4fc63 91b51ba f00a113 91b51ba 8a756a4 ba4fc63 91b51ba 1807519 91b51ba 1807519 91b51ba 8a756a4 91b51ba efa1613 91b51ba efa1613 ba4fc63 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from huggingface_hub import loginimport streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
st.title("📚 AI Adaptive Learning (Local LLaMA)")
# Hugging Face authentication for private models
HF_API_TOKEN = st.secrets["HF_API_TOKEN"]
login(token=HF_API_TOKEN)
MODEL_ID = "meta-llama/Llama-2-7b-chat-hf" # or your private model
@st.cache_resource
def load_model():
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_auth_token=HF_API_TOKEN)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
use_auth_token=HF_API_TOKEN,
device_map="auto", # uses GPU if available, CPU otherwise
)
return tokenizer, model
tokenizer, model = load_model()
# Load model & tokenizer
MODEL_ID = "TheBloke/vicuna-7B-1.1-HF" # smaller public LLaMA-like model
@st.cache_resource # caches model to avoid reload on every run
def load_model():
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
device_map="auto", # automatically uses CPU or GPU
)
return tokenizer, model
tokenizer, model = load_model()
# Input question
user_input = st.text_input("Ask a question:")
# Generate answer locally
if st.button("Submit") and user_input:
inputs = tokenizer(user_input, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=256)
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
st.subheader("AI Answer:")
st.write(answer)
user_input = st.text_input("Ask a question:")
|