Stanley03's picture
Update app.py
36f67aa verified
import gradio as gr
from docx import Document
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM
import faiss
import torch
import numpy as np
# Load .docx file
def load_docx_text(path):
doc = Document(path)
return "\n".join([p.text for p in doc.paragraphs if p.text.strip() != ""])
# Make sure this filename matches the uploaded file
text_data = load_docx_text("8_laws.docx")
# Chunk text
def chunk_text(text, chunk_size=300, overlap=50):
words = text.split()
return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size - overlap)]
doc_chunks = chunk_text(text_data)
# Embed text
embedder = SentenceTransformer("all-MiniLM-L6-v2")
doc_embeddings = embedder.encode(doc_chunks)
# Build FAISS index
index = faiss.IndexFlatL2(doc_embeddings.shape[1])
index.add(np.array(doc_embeddings))
# Load TinyLLaMA (CPU safe)
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
# RAG logic
def retrieve_context(query, k=3):
query_vec = embedder.encode([query])
_, indices = index.search(np.array(query_vec), k)
return [doc_chunks[i] for i in indices[0]]
def generate_answer(question):
try:
context = "\n".join(retrieve_context(question))
prompt = f"""Use the context below to answer the question.
Context:
{context}
Question:
{question}
Answer:"""
print("🧠 Prompt:\n", prompt)
inputs = tokenizer(prompt, return_tensors="pt")
output = model.generate(**inputs, max_new_tokens=150)
answer = tokenizer.decode(output[0], skip_special_tokens=True)
return answer
except Exception as e:
print("❌ ERROR:", str(e))
return f"An error occurred: {e}"
# Gradio interface
demo = gr.Interface(
fn=generate_answer,
inputs=gr.Textbox(lines=2, placeholder="Ask a question..."),
outputs="text",
title="📘 TinyLLaMA DOCX RAG",
description="Ask a question about the 8 laws of health"
)
demo.launch()