Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -62,15 +62,23 @@ def main(message: str):
|
|
| 62 |
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
| 63 |
chunks = text_splitter.split_text(paper_text)
|
| 64 |
|
| 65 |
-
# Create embeddings and vector store
|
| 66 |
embeddings = OpenAIEmbeddings()
|
| 67 |
-
vectorstore = FAISS.from_texts(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
# Create the conversational chain
|
| 70 |
memory = ConversationBufferMemory(
|
| 71 |
memory_key="chat_history",
|
| 72 |
return_messages=True,
|
| 73 |
-
output_key="answer"
|
| 74 |
)
|
| 75 |
|
| 76 |
qa_chain = ConversationalRetrievalChain.from_llm(
|
|
@@ -95,8 +103,8 @@ def main(message: str):
|
|
| 95 |
response = qa_chain({"question": message})
|
| 96 |
answer = response["answer"]
|
| 97 |
|
| 98 |
-
#
|
| 99 |
-
sources = "\n".join([f"- {doc.metadata.get('title', 'Unknown title')} ({doc.metadata.get('link', 'No link')})" for doc in response.get("source_documents", [])])
|
| 100 |
if sources:
|
| 101 |
answer += f"\n\nSources:\n{sources}"
|
| 102 |
|
|
|
|
| 62 |
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
| 63 |
chunks = text_splitter.split_text(paper_text)
|
| 64 |
|
| 65 |
+
# Create embeddings and vector store, include chunk-specific metadata
|
| 66 |
embeddings = OpenAIEmbeddings()
|
| 67 |
+
vectorstore = FAISS.from_texts(
|
| 68 |
+
chunks,
|
| 69 |
+
embeddings,
|
| 70 |
+
metadatas=[{
|
| 71 |
+
"title": selected_paper.title,
|
| 72 |
+
"link": selected_paper.entry_id,
|
| 73 |
+
"chunk": f"Chunk {i+1}/{len(chunks)}"
|
| 74 |
+
} for i in range(len(chunks))]
|
| 75 |
+
)
|
| 76 |
|
| 77 |
# Create the conversational chain
|
| 78 |
memory = ConversationBufferMemory(
|
| 79 |
memory_key="chat_history",
|
| 80 |
return_messages=True,
|
| 81 |
+
output_key="answer"
|
| 82 |
)
|
| 83 |
|
| 84 |
qa_chain = ConversationalRetrievalChain.from_llm(
|
|
|
|
| 103 |
response = qa_chain({"question": message})
|
| 104 |
answer = response["answer"]
|
| 105 |
|
| 106 |
+
# Handling the sources with chunk-specific metadata
|
| 107 |
+
sources = "\n".join([f"- {doc.metadata.get('title', 'Unknown title')} ({doc.metadata.get('link', 'No link')}) - {doc.metadata.get('chunk', 'No chunk info')}" for doc in response.get("source_documents", [])])
|
| 108 |
if sources:
|
| 109 |
answer += f"\n\nSources:\n{sources}"
|
| 110 |
|