Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel | |
| import torch | |
| import faiss | |
| import numpy as np | |
| # --- Load TinyLlama Chat Model --- | |
| llama_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" | |
| llama_tokenizer = AutoTokenizer.from_pretrained(llama_id) | |
| llama_model = AutoModelForCausalLM.from_pretrained(llama_id) | |
| # --- Load Nomic Embedding Model --- | |
| nomic_id = "nomic-ai/nomic-embed-text-v1" | |
| nomic_tokenizer = AutoTokenizer.from_pretrained(nomic_id) | |
| nomic_model = AutoModel.from_pretrained(nomic_id) | |
| # --- FAISS Setup --- | |
| doc_store = [] | |
| doc_index = None | |
| def embed(texts): | |
| inputs = nomic_tokenizer(texts, return_tensors="pt", padding=True, truncation=True) | |
| with torch.no_grad(): | |
| outputs = nomic_model(**inputs) | |
| embeddings = outputs.last_hidden_state.mean(dim=1).cpu().numpy() | |
| return embeddings | |
| def add_documents(text_block): | |
| global doc_index, doc_store | |
| docs = [line.strip() for line in text_block.split("\n") if line.strip()] | |
| vectors = embed(docs) | |
| doc_store.extend(docs) | |
| if doc_index is None: | |
| doc_index = faiss.IndexFlatL2(vectors.shape[1]) | |
| doc_index.add(vectors) | |
| return f"Added {len(docs)} documents." | |
| def chat_with_tinyllama(question): | |
| if doc_index is None or len(doc_store) == 0: | |
| context = "No documents uploaded yet." | |
| else: | |
| q_embed = embed([question]) | |
| D, I = doc_index.search(q_embed, 1) | |
| context = doc_store[I[0][0]] | |
| prompt = f"<|system|> You are a helpful assistant.\n<|user|> Context: {context}\nQuestion: {question}\n<|assistant|>" | |
| input_ids = llama_tokenizer(prompt, return_tensors="pt").input_ids | |
| output = llama_model.generate(input_ids, max_new_tokens=100, do_sample=True) | |
| answer = llama_tokenizer.decode(output[0], skip_special_tokens=True) | |
| return answer.split("<|assistant|>")[-1].strip() | |
| # --- Gradio Interface --- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# π€ TinyLlama + Nomic Chatbot") | |
| with gr.Row(): | |
| doc_input = gr.Textbox(lines=5, label="Upload Documents (one per line)") | |
| upload_btn = gr.Button("Add to Knowledge Base") | |
| with gr.Row(): | |
| question = gr.Textbox(lines=1, label="Ask a Question") | |
| response = gr.Textbox(lines=4, label="TinyLlama Response") | |
| upload_btn.click(add_documents, inputs=doc_input, outputs=doc_input) | |
| question.submit(chat_with_tinyllama, inputs=question, outputs=response) | |
| demo.launch() |