Spaces:

QuantumLearner
/

Space24

Sleeping

Entreprenerdly commited on Aug 10, 2024

Commit

f786bd8

verified ·

1 Parent(s): d0474e3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -62,15 +62,23 @@ def main(message: str):
         text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
         chunks = text_splitter.split_text(paper_text)
-        # Create embeddings and vector store
         embeddings = OpenAIEmbeddings()
-        vectorstore = FAISS.from_texts(chunks, embeddings, metadatas=[{"title": selected_paper.title, "link": selected_paper.entry_id}] * len(chunks))
         # Create the conversational chain
         memory = ConversationBufferMemory(
             memory_key="chat_history",
             return_messages=True,
-            output_key="answer"  # Specify the output key to avoid errors
         )
         qa_chain = ConversationalRetrievalChain.from_llm(
@@ -95,8 +103,8 @@ def main(message: str):
             response = qa_chain({"question": message})
             answer = response["answer"]
-            # Check if source metadata is present, otherwise handle gracefully
-            sources = "\n".join([f"- {doc.metadata.get('title', 'Unknown title')} ({doc.metadata.get('link', 'No link')})" for doc in response.get("source_documents", [])])
             if sources:
                 answer += f"\n\nSources:\n{sources}"

         text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
         chunks = text_splitter.split_text(paper_text)
+        # Create embeddings and vector store, include chunk-specific metadata
         embeddings = OpenAIEmbeddings()
+        vectorstore = FAISS.from_texts(
+            chunks,
+            embeddings,
+            metadatas=[{
+                "title": selected_paper.title,
+                "link": selected_paper.entry_id,
+                "chunk": f"Chunk {i+1}/{len(chunks)}"
+            } for i in range(len(chunks))]
+        )
         # Create the conversational chain
         memory = ConversationBufferMemory(
             memory_key="chat_history",
             return_messages=True,
+            output_key="answer"
         )
         qa_chain = ConversationalRetrievalChain.from_llm(
             response = qa_chain({"question": message})
             answer = response["answer"]
+            # Handling the sources with chunk-specific metadata
+            sources = "\n".join([f"- {doc.metadata.get('title', 'Unknown title')} ({doc.metadata.get('link', 'No link')}) - {doc.metadata.get('chunk', 'No chunk info')}" for doc in response.get("source_documents", [])])
             if sources:
                 answer += f"\n\nSources:\n{sources}"