Spaces:

Stanley03
/

tinyllama-docx-rag

Runtime error

App Files Files Community

Stanley03 commited on Jul 4, 2025

Commit

36f67aa

verified ·

1 Parent(s): 6aebbe5

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -12

app.py CHANGED Viewed

@@ -6,11 +6,12 @@ import faiss
 import torch
 import numpy as np
-# Load docx content
 def load_docx_text(path):
     doc = Document(path)
     return "\n".join([p.text for p in doc.paragraphs if p.text.strip() != ""])
 text_data = load_docx_text("8_laws.docx")
 # Chunk text
@@ -20,25 +21,28 @@ def chunk_text(text, chunk_size=300, overlap=50):
 doc_chunks = chunk_text(text_data)
-# Embedding model and FAISS
 embedder = SentenceTransformer("all-MiniLM-L6-v2")
 doc_embeddings = embedder.encode(doc_chunks)
 index = faiss.IndexFlatL2(doc_embeddings.shape[1])
 index.add(np.array(doc_embeddings))
-# LLM
 tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
-model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.float16, device_map="auto")
-# RAG Logic
 def retrieve_context(query, k=3):
     query_vec = embedder.encode([query])
     _, indices = index.search(np.array(query_vec), k)
     return [doc_chunks[i] for i in indices[0]]
 def generate_answer(question):
-    context = "\n".join(retrieve_context(question))
-    prompt = f"""Use the context below to answer the question.
 Context:
 {context}
@@ -47,17 +51,24 @@ Question:
 {question}
 Answer:"""
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-    outputs = model.generate(**inputs, max_new_tokens=150)
-    return tokenizer.decode(outputs[0], skip_special_tokens=True)
-# Gradio App
 demo = gr.Interface(
     fn=generate_answer,
     inputs=gr.Textbox(lines=2, placeholder="Ask a question..."),
     outputs="text",
     title="📘 TinyLLaMA DOCX RAG",
-    description="Ask questions from the 8 Laws docx file"
 )
 demo.launch()

 import torch
 import numpy as np
+# Load .docx file
 def load_docx_text(path):
     doc = Document(path)
     return "\n".join([p.text for p in doc.paragraphs if p.text.strip() != ""])
+# Make sure this filename matches the uploaded file
 text_data = load_docx_text("8_laws.docx")
 # Chunk text
 doc_chunks = chunk_text(text_data)
+# Embed text
 embedder = SentenceTransformer("all-MiniLM-L6-v2")
 doc_embeddings = embedder.encode(doc_chunks)
+# Build FAISS index
 index = faiss.IndexFlatL2(doc_embeddings.shape[1])
 index.add(np.array(doc_embeddings))
+# Load TinyLLaMA (CPU safe)
 tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
+model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
+# RAG logic
 def retrieve_context(query, k=3):
     query_vec = embedder.encode([query])
     _, indices = index.search(np.array(query_vec), k)
     return [doc_chunks[i] for i in indices[0]]
 def generate_answer(question):
+    try:
+        context = "\n".join(retrieve_context(question))
+        prompt = f"""Use the context below to answer the question.
 Context:
 {context}
 {question}
 Answer:"""
+        print("🧠 Prompt:\n", prompt)
+        inputs = tokenizer(prompt, return_tensors="pt")
+        output = model.generate(**inputs, max_new_tokens=150)
+        answer = tokenizer.decode(output[0], skip_special_tokens=True)
+        return answer
+    except Exception as e:
+        print("❌ ERROR:", str(e))
+        return f"An error occurred: {e}"
+# Gradio interface
 demo = gr.Interface(
     fn=generate_answer,
     inputs=gr.Textbox(lines=2, placeholder="Ask a question..."),
     outputs="text",
     title="📘 TinyLLaMA DOCX RAG",
+    description="Ask a question about the 8 laws of health"
 )
 demo.launch()