Spaces:
Sleeping
Sleeping
Upload 7 files
Browse files- chat_handler.py +17 -0
- citation_generator.py +26 -0
- debate_simulator.py +36 -0
- document_processor.py +38 -0
- gap_analyzer.py +32 -0
- summarizer_module.py +26 -0
- translator.py +35 -0
chat_handler.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain.chains import RetrievalQA
|
| 2 |
+
|
| 3 |
+
def chat_with_paper(llm, vectorstore, query):
|
| 4 |
+
"""
|
| 5 |
+
Chat with the paper using Q&A
|
| 6 |
+
|
| 7 |
+
Args:
|
| 8 |
+
llm: Language model instance
|
| 9 |
+
vectorstore: FAISS vector store
|
| 10 |
+
query: User's question
|
| 11 |
+
|
| 12 |
+
Returns:
|
| 13 |
+
str: Answer to the question
|
| 14 |
+
"""
|
| 15 |
+
retriever = vectorstore.as_retriever()
|
| 16 |
+
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
|
| 17 |
+
return qa_chain.run(query)
|
citation_generator.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain.chains.combine_documents import create_stuff_documents_chain
|
| 2 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 3 |
+
|
| 4 |
+
def get_citation_prompt():
|
| 5 |
+
"""Get the prompt template for citation generation"""
|
| 6 |
+
return ChatPromptTemplate.from_template("""
|
| 7 |
+
Generate an APA-style citation based on the document content:
|
| 8 |
+
<context>
|
| 9 |
+
{context}
|
| 10 |
+
</context>
|
| 11 |
+
""")
|
| 12 |
+
|
| 13 |
+
def generate_citation(llm, documents):
|
| 14 |
+
"""
|
| 15 |
+
Generate APA-style citation for the document
|
| 16 |
+
|
| 17 |
+
Args:
|
| 18 |
+
llm: Language model instance
|
| 19 |
+
documents: List of document chunks
|
| 20 |
+
|
| 21 |
+
Returns:
|
| 22 |
+
str: APA citation
|
| 23 |
+
"""
|
| 24 |
+
citation_prompt = get_citation_prompt()
|
| 25 |
+
citation_chain = create_stuff_documents_chain(llm, citation_prompt)
|
| 26 |
+
return citation_chain.invoke({"context": documents})
|
debate_simulator.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain.chains.combine_documents import create_stuff_documents_chain
|
| 2 |
+
from langchain.chains import LLMChain
|
| 3 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 4 |
+
from summarizer import get_summary_prompt
|
| 5 |
+
|
| 6 |
+
def get_debate_prompt():
|
| 7 |
+
"""Get the prompt template for debate simulation"""
|
| 8 |
+
return ChatPromptTemplate.from_template("""
|
| 9 |
+
Act as two researchers discussing a paper.
|
| 10 |
+
Supporter: Defends the core idea of the document.
|
| 11 |
+
Critic: Challenges its assumptions, methods, or impact.
|
| 12 |
+
Use the following summary as reference:
|
| 13 |
+
{summary}
|
| 14 |
+
Generate a short conversation between them.
|
| 15 |
+
""")
|
| 16 |
+
|
| 17 |
+
def simulate_debate(llm, documents):
|
| 18 |
+
"""
|
| 19 |
+
Simulate a debate about the document
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
llm: Language model instance
|
| 23 |
+
documents: List of document chunks
|
| 24 |
+
|
| 25 |
+
Returns:
|
| 26 |
+
str: Debate conversation
|
| 27 |
+
"""
|
| 28 |
+
# First get summary
|
| 29 |
+
summary_prompt = get_summary_prompt()
|
| 30 |
+
chain = create_stuff_documents_chain(llm, summary_prompt)
|
| 31 |
+
summary = chain.invoke({"context": documents})
|
| 32 |
+
|
| 33 |
+
# Then simulate debate
|
| 34 |
+
debate_prompt = get_debate_prompt()
|
| 35 |
+
debate_chain = LLMChain(llm=llm, prompt=debate_prompt)
|
| 36 |
+
return debate_chain.invoke({"summary": summary})
|
document_processor.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import PyPDF2
|
| 2 |
+
from langchain_core.documents import Document
|
| 3 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 4 |
+
from langchain_community.vectorstores import FAISS
|
| 5 |
+
|
| 6 |
+
def process_pdfs(uploaded_files):
|
| 7 |
+
"""
|
| 8 |
+
Extract text from uploaded PDF files and split into chunks
|
| 9 |
+
|
| 10 |
+
Args:
|
| 11 |
+
uploaded_files: List of uploaded PDF files
|
| 12 |
+
|
| 13 |
+
Returns:
|
| 14 |
+
list: List of document chunks
|
| 15 |
+
"""
|
| 16 |
+
documents = []
|
| 17 |
+
for file in uploaded_files:
|
| 18 |
+
reader = PyPDF2.PdfReader(file)
|
| 19 |
+
text = ""
|
| 20 |
+
for page in reader.pages:
|
| 21 |
+
text += page.extract_text() or ""
|
| 22 |
+
documents.append(Document(page_content=text, metadata={"source": file.name}))
|
| 23 |
+
|
| 24 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 25 |
+
return splitter.split_documents(documents)
|
| 26 |
+
|
| 27 |
+
def create_vector_store(documents, embedding):
|
| 28 |
+
"""
|
| 29 |
+
Create FAISS vector store from documents
|
| 30 |
+
|
| 31 |
+
Args:
|
| 32 |
+
documents: List of document chunks
|
| 33 |
+
embedding: Embedding model
|
| 34 |
+
|
| 35 |
+
Returns:
|
| 36 |
+
FAISS: Vector store
|
| 37 |
+
"""
|
| 38 |
+
return FAISS.from_documents(documents, embedding)
|
gap_analyzer.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain.chains.combine_documents import create_stuff_documents_chain
|
| 2 |
+
from langchain.chains import LLMChain
|
| 3 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 4 |
+
from summarizer import get_summary_prompt
|
| 5 |
+
|
| 6 |
+
def get_gap_prompt():
|
| 7 |
+
"""Get the prompt template for research gap analysis"""
|
| 8 |
+
return ChatPromptTemplate.from_template("""
|
| 9 |
+
Analyze the following summary and identify key research gaps, unanswered questions, or limitations:
|
| 10 |
+
{summary}
|
| 11 |
+
""")
|
| 12 |
+
|
| 13 |
+
def identify_research_gaps(llm, documents):
|
| 14 |
+
"""
|
| 15 |
+
Identify research gaps in the document
|
| 16 |
+
|
| 17 |
+
Args:
|
| 18 |
+
llm: Language model instance
|
| 19 |
+
documents: List of document chunks
|
| 20 |
+
|
| 21 |
+
Returns:
|
| 22 |
+
str: Research gaps analysis
|
| 23 |
+
"""
|
| 24 |
+
# First get summary
|
| 25 |
+
summary_prompt = get_summary_prompt()
|
| 26 |
+
chain1 = create_stuff_documents_chain(llm, summary_prompt)
|
| 27 |
+
summary = chain1.invoke({"context": documents})
|
| 28 |
+
|
| 29 |
+
# Then analyze gaps
|
| 30 |
+
gap_prompt = get_gap_prompt()
|
| 31 |
+
chain2 = LLMChain(llm=llm, prompt=gap_prompt)
|
| 32 |
+
return chain2.invoke({"summary": summary})
|
summarizer_module.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain.chains.combine_documents import create_stuff_documents_chain
|
| 2 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 3 |
+
|
| 4 |
+
def get_summary_prompt():
|
| 5 |
+
"""Get the prompt template for document summarization"""
|
| 6 |
+
return ChatPromptTemplate.from_template("""
|
| 7 |
+
You are a helpful assistant. Summarize the following document clearly and accurately:
|
| 8 |
+
<context>
|
| 9 |
+
{context}
|
| 10 |
+
</context>
|
| 11 |
+
""")
|
| 12 |
+
|
| 13 |
+
def summarize_document(llm, documents):
|
| 14 |
+
"""
|
| 15 |
+
Summarize the uploaded document(s)
|
| 16 |
+
|
| 17 |
+
Args:
|
| 18 |
+
llm: Language model instance
|
| 19 |
+
documents: List of document chunks
|
| 20 |
+
|
| 21 |
+
Returns:
|
| 22 |
+
str: Document summary
|
| 23 |
+
"""
|
| 24 |
+
summary_prompt = get_summary_prompt()
|
| 25 |
+
chain = create_stuff_documents_chain(llm, summary_prompt)
|
| 26 |
+
return chain.invoke({"context": documents})
|
translator.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain.chains import LLMChain
|
| 2 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 3 |
+
|
| 4 |
+
def get_translate_prompt():
|
| 5 |
+
"""Get the prompt template for translation"""
|
| 6 |
+
return ChatPromptTemplate.from_template("""
|
| 7 |
+
Translate the following content into {language}, preserving meaning and academic tone:
|
| 8 |
+
{content}
|
| 9 |
+
""")
|
| 10 |
+
|
| 11 |
+
def translate_text(llm, content, language):
|
| 12 |
+
"""
|
| 13 |
+
Translate content to specified language
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
llm: Language model instance
|
| 17 |
+
content: Content to translate
|
| 18 |
+
language: Target language
|
| 19 |
+
|
| 20 |
+
Returns:
|
| 21 |
+
str: Translated content
|
| 22 |
+
"""
|
| 23 |
+
# Handle dictionary output
|
| 24 |
+
if isinstance(content, dict):
|
| 25 |
+
combined_text = "\n\n".join(str(v) for v in content.values())
|
| 26 |
+
else:
|
| 27 |
+
combined_text = str(content)
|
| 28 |
+
|
| 29 |
+
translate_prompt = get_translate_prompt()
|
| 30 |
+
translate_chain = LLMChain(llm=llm, prompt=translate_prompt)
|
| 31 |
+
result = translate_chain.invoke({
|
| 32 |
+
"language": language,
|
| 33 |
+
"content": combined_text
|
| 34 |
+
})
|
| 35 |
+
return result
|