import gradio as gr from docx import Document from sentence_transformers import SentenceTransformer from transformers import AutoTokenizer, AutoModelForCausalLM import faiss import torch import numpy as np # Load .docx file def load_docx_text(path): doc = Document(path) return "\n".join([p.text for p in doc.paragraphs if p.text.strip() != ""]) # Make sure this filename matches the uploaded file text_data = load_docx_text("8_laws.docx") # Chunk text def chunk_text(text, chunk_size=300, overlap=50): words = text.split() return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size - overlap)] doc_chunks = chunk_text(text_data) # Embed text embedder = SentenceTransformer("all-MiniLM-L6-v2") doc_embeddings = embedder.encode(doc_chunks) # Build FAISS index index = faiss.IndexFlatL2(doc_embeddings.shape[1]) index.add(np.array(doc_embeddings)) # Load TinyLLaMA (CPU safe) tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") # RAG logic def retrieve_context(query, k=3): query_vec = embedder.encode([query]) _, indices = index.search(np.array(query_vec), k) return [doc_chunks[i] for i in indices[0]] def generate_answer(question): try: context = "\n".join(retrieve_context(question)) prompt = f"""Use the context below to answer the question. Context: {context} Question: {question} Answer:""" print("🧠 Prompt:\n", prompt) inputs = tokenizer(prompt, return_tensors="pt") output = model.generate(**inputs, max_new_tokens=150) answer = tokenizer.decode(output[0], skip_special_tokens=True) return answer except Exception as e: print("❌ ERROR:", str(e)) return f"An error occurred: {e}" # Gradio interface demo = gr.Interface( fn=generate_answer, inputs=gr.Textbox(lines=2, placeholder="Ask a question..."), outputs="text", title="📘 TinyLLaMA DOCX RAG", description="Ask a question about the 8 laws of health" ) demo.launch()