File size: 1,675 Bytes
efa1613
ba4fc63
 
 
91b51ba
 
f00a113
91b51ba
8a756a4
ba4fc63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91b51ba
 
 
 
 
 
 
 
 
 
1807519
91b51ba
1807519
91b51ba
 
8a756a4
91b51ba
efa1613
91b51ba
 
 
efa1613
ba4fc63
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from huggingface_hub import loginimport streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

st.title("📚 AI Adaptive Learning (Local LLaMA)")

# Hugging Face authentication for private models
HF_API_TOKEN = st.secrets["HF_API_TOKEN"]
login(token=HF_API_TOKEN)
MODEL_ID = "meta-llama/Llama-2-7b-chat-hf"  # or your private model

@st.cache_resource
def load_model():
    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_auth_token=HF_API_TOKEN)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        use_auth_token=HF_API_TOKEN,
        device_map="auto",  # uses GPU if available, CPU otherwise
    )
    return tokenizer, model

tokenizer, model = load_model()
# Load model & tokenizer
MODEL_ID = "TheBloke/vicuna-7B-1.1-HF"  # smaller public LLaMA-like model
@st.cache_resource  # caches model to avoid reload on every run
def load_model():
    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        device_map="auto",  # automatically uses CPU or GPU
    )
    return tokenizer, model

tokenizer, model = load_model()

# Input question
user_input = st.text_input("Ask a question:")

# Generate answer locally
if st.button("Submit") and user_input:
    inputs = tokenizer(user_input, return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=256)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    st.subheader("AI Answer:")
    st.write(answer)
    user_input = st.text_input("Ask a question:")