Spaces:

chburhan64
/

PDF_Agent

Sleeping

chburhan64 commited on Jul 13, 2025

Commit

bf06fa6

verified ·

1 Parent(s): 631ea2c

Upload 7 files

Files changed (7) hide show

chat_handler.py ADDED Viewed

+from langchain.chains import RetrievalQA
+def chat_with_paper(llm, vectorstore, query):
+    """
+    Chat with the paper using Q&A
+    Args:
+        llm: Language model instance
+        vectorstore: FAISS vector store
+        query: User's question
+    Returns:
+        str: Answer to the question
+    """
+    retriever = vectorstore.as_retriever()
+    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
+    return qa_chain.run(query)

citation_generator.py ADDED Viewed

+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain_core.prompts import ChatPromptTemplate
+def get_citation_prompt():
+    """Get the prompt template for citation generation"""
+    return ChatPromptTemplate.from_template("""
+Generate an APA-style citation based on the document content:
+<context>
+{context}
+</context>
+""")
+def generate_citation(llm, documents):
+    """
+    Generate APA-style citation for the document
+    Args:
+        llm: Language model instance
+        documents: List of document chunks
+    Returns:
+        str: APA citation
+    """
+    citation_prompt = get_citation_prompt()
+    citation_chain = create_stuff_documents_chain(llm, citation_prompt)
+    return citation_chain.invoke({"context": documents})

debate_simulator.py ADDED Viewed

+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain.chains import LLMChain
+from langchain_core.prompts import ChatPromptTemplate
+from summarizer import get_summary_prompt
+def get_debate_prompt():
+    """Get the prompt template for debate simulation"""
+    return ChatPromptTemplate.from_template("""
+Act as two researchers discussing a paper.
+Supporter: Defends the core idea of the document.
+Critic: Challenges its assumptions, methods, or impact.
+Use the following summary as reference:
+{summary}
+Generate a short conversation between them.
+""")
+def simulate_debate(llm, documents):
+    """
+    Simulate a debate about the document
+    Args:
+        llm: Language model instance
+        documents: List of document chunks
+    Returns:
+        str: Debate conversation
+    """
+    # First get summary
+    summary_prompt = get_summary_prompt()
+    chain = create_stuff_documents_chain(llm, summary_prompt)
+    summary = chain.invoke({"context": documents})
+    # Then simulate debate
+    debate_prompt = get_debate_prompt()
+    debate_chain = LLMChain(llm=llm, prompt=debate_prompt)
+    return debate_chain.invoke({"summary": summary})

document_processor.py ADDED Viewed

+import PyPDF2
+from langchain_core.documents import Document
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import FAISS
+def process_pdfs(uploaded_files):
+    """
+    Extract text from uploaded PDF files and split into chunks
+    Args:
+        uploaded_files: List of uploaded PDF files
+    Returns:
+        list: List of document chunks
+    """
+    documents = []
+    for file in uploaded_files:
+        reader = PyPDF2.PdfReader(file)
+        text = ""
+        for page in reader.pages:
+            text += page.extract_text() or ""
+        documents.append(Document(page_content=text, metadata={"source": file.name}))
+    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+    return splitter.split_documents(documents)
+def create_vector_store(documents, embedding):
+    """
+    Create FAISS vector store from documents
+    Args:
+        documents: List of document chunks
+        embedding: Embedding model
+    Returns:
+        FAISS: Vector store
+    """
+    return FAISS.from_documents(documents, embedding)

gap_analyzer.py ADDED Viewed

+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain.chains import LLMChain
+from langchain_core.prompts import ChatPromptTemplate
+from summarizer import get_summary_prompt
+def get_gap_prompt():
+    """Get the prompt template for research gap analysis"""
+    return ChatPromptTemplate.from_template("""
+Analyze the following summary and identify key research gaps, unanswered questions, or limitations:
+{summary}
+""")
+def identify_research_gaps(llm, documents):
+    """
+    Identify research gaps in the document
+    Args:
+        llm: Language model instance
+        documents: List of document chunks
+    Returns:
+        str: Research gaps analysis
+    """
+    # First get summary
+    summary_prompt = get_summary_prompt()
+    chain1 = create_stuff_documents_chain(llm, summary_prompt)
+    summary = chain1.invoke({"context": documents})
+    # Then analyze gaps
+    gap_prompt = get_gap_prompt()
+    chain2 = LLMChain(llm=llm, prompt=gap_prompt)
+    return chain2.invoke({"summary": summary})

summarizer_module.py ADDED Viewed

+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain_core.prompts import ChatPromptTemplate
+def get_summary_prompt():
+    """Get the prompt template for document summarization"""
+    return ChatPromptTemplate.from_template("""
+You are a helpful assistant. Summarize the following document clearly and accurately:
+<context>
+{context}
+</context>
+""")
+def summarize_document(llm, documents):
+    """
+    Summarize the uploaded document(s)
+    Args:
+        llm: Language model instance
+        documents: List of document chunks
+    Returns:
+        str: Document summary
+    """
+    summary_prompt = get_summary_prompt()
+    chain = create_stuff_documents_chain(llm, summary_prompt)
+    return chain.invoke({"context": documents})

translator.py ADDED Viewed

+from langchain.chains import LLMChain
+from langchain_core.prompts import ChatPromptTemplate
+def get_translate_prompt():
+    """Get the prompt template for translation"""
+    return ChatPromptTemplate.from_template("""
+Translate the following content into {language}, preserving meaning and academic tone:
+{content}
+""")
+def translate_text(llm, content, language):
+    """
+    Translate content to specified language
+    Args:
+        llm: Language model instance
+        content: Content to translate
+        language: Target language
+    Returns:
+        str: Translated content
+    """
+    # Handle dictionary output
+    if isinstance(content, dict):
+        combined_text = "\n\n".join(str(v) for v in content.values())
+    else:
+        combined_text = str(content)
+    translate_prompt = get_translate_prompt()
+    translate_chain = LLMChain(llm=llm, prompt=translate_prompt)
+    result = translate_chain.invoke({
+        "language": language,
+        "content": combined_text
+    })
+    return result