Spaces:

Stanley03
/

tinyllama-docx-rag

Runtime error

File size: 2,110 Bytes

da84005
 
 
6aebbe5
da84005
6aebbe5
da84005
d18d9ad
36f67aa
da84005
 
 
d18d9ad
36f67aa
da84005
 
6aebbe5
da84005
 
6aebbe5
da84005
 
 
36f67aa
da84005
 
36f67aa
 
da84005
 
 
36f67aa
6aebbe5
36f67aa
09f0eda
36f67aa
da84005
6aebbe5
 
 
da84005
d18d9ad
36f67aa
 
 
d2cf846
 
d18d9ad
 
 
d2cf846
 
 
51ef299
36f67aa
 
 
 
 
 
 
 
 
 
 
da84005
 
041ba3a
da84005
041ba3a
36f67aa
da84005

import gradio as gr
from docx import Document
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM
import faiss
import torch
import numpy as np

# Load .docx file
def load_docx_text(path):
    doc = Document(path)
    return "\n".join([p.text for p in doc.paragraphs if p.text.strip() != ""])

# Make sure this filename matches the uploaded file
text_data = load_docx_text("8_laws.docx")

# Chunk text
def chunk_text(text, chunk_size=300, overlap=50):
    words = text.split()
    return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size - overlap)]

doc_chunks = chunk_text(text_data)

# Embed text
embedder = SentenceTransformer("all-MiniLM-L6-v2")
doc_embeddings = embedder.encode(doc_chunks)

# Build FAISS index
index = faiss.IndexFlatL2(doc_embeddings.shape[1])
index.add(np.array(doc_embeddings))

# Load TinyLLaMA (CPU safe)
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")

# RAG logic
def retrieve_context(query, k=3):
    query_vec = embedder.encode([query])
    _, indices = index.search(np.array(query_vec), k)
    return [doc_chunks[i] for i in indices[0]]

def generate_answer(question):
    try:
        context = "\n".join(retrieve_context(question))
        prompt = f"""Use the context below to answer the question.

Context:
{context}

Question:
{question}

Answer:"""

        print("🧠 Prompt:\n", prompt)

        inputs = tokenizer(prompt, return_tensors="pt")
        output = model.generate(**inputs, max_new_tokens=150)
        answer = tokenizer.decode(output[0], skip_special_tokens=True)
        return answer
    except Exception as e:
        print("❌ ERROR:", str(e))
        return f"An error occurred: {e}"

# Gradio interface
demo = gr.Interface(
    fn=generate_answer,
    inputs=gr.Textbox(lines=2, placeholder="Ask a question..."),
    outputs="text",
    title="📘 TinyLLaMA DOCX RAG",
    description="Ask a question about the 8 laws of health"
)

demo.launch()