Spaces:

Cipher29
/

TheyKnow

No application file

App Files Files Community

Cipher29 commited on Nov 11, 2024

Commit

7894e62

verified ·

1 Parent(s): d23e82f

Upload 3 files

Browse files

Files changed (3) hide show

TheyKnow/Gumball.docx +0 -0
TheyKnow/RAG.py +67 -0
TheyKnow/~$umball.docx +0 -0

TheyKnow/Gumball.docx ADDED Viewed

Binary file (20.7 kB). View file

TheyKnow/RAG.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import streamlit as st
+from docx import Document
+from sentence_transformers import SentenceTransformer
+from transformers import pipeline
+import faiss
+import numpy as np
+# Initialize the sentence transformer for embeddings
+embedder = SentenceTransformer('all-MiniLM-L6-v2')
+# Initialize the text generation model (Flan-T5)
+pipe = pipeline("text2text-generation", model="google/flan-t5-large")
+# Path to the pre-defined .docx document
+file_path = "/Users/estebanm/Desktop/DS-TEST/TheyKnow/Gumball.docx"
+# Load and process the .docx document
+def load_docx(file_path):
+    """Load text from a .docx file and return a list of paragraphs."""
+    doc = Document(file_path)
+    text_chunks = [para.text for para in doc.paragraphs if para.text]
+    return text_chunks
+# Create FAISS index for similarity search
+index = None  # Global index for the uploaded document
+gumball_document = []  # To store the document content
+def create_faiss_index(text_chunks):
+    """Create a FAISS index from text chunks."""
+    global index
+    # Embed the chunks
+    document_embeddings = embedder.encode(text_chunks)
+    # Initialize and add embeddings to FAISS index
+    dimension = document_embeddings.shape[1]
+    index = faiss.IndexFlatL2(dimension)
+    index.add(np.array(document_embeddings))
+def retrieve_relevant_text(question, top_k=3):
+    """Retrieve the most relevant text chunks based on the question."""
+    question_embedding = embedder.encode([question])
+    distances, indices = index.search(np.array(question_embedding), top_k)
+    return [gumball_document[idx] for idx in indices[0]]
+# Streamlit App
+st.title("RAG with The Amazing World of Gumball")
+# Automatically load and process the document from the specified path
+gumball_document = load_docx(file_path)
+create_faiss_index(gumball_document)  # Create FAISS index
+# Display input for question
+question = st.text_input("Ask a question:")
+# Slider to set the number of relevant passages to retrieve
+top_k = st.slider("Number of relevant passages to retrieve:", 1, 5, 3)
+if question:
+    # Retrieve relevant text from the document
+    relevant_texts = retrieve_relevant_text(question, top_k=top_k)
+    context = " ".join(relevant_texts)
+    # Generate answer using Flan-T5 model
+    generated_answer = pipe(f"question: {question} context: {context}", max_length=100)[0]["generated_text"]
+    # Display the answer and context used
+    st.write("Answer:", generated_answer)

TheyKnow/~$umball.docx ADDED Viewed

Binary file (162 Bytes). View file