Llama-Chat-Example-AliA / scientific_paper_agent.py
AliA1997
Integrated multi-agent workflow from llama index.
5dde853
import datasets
from langchain_core.documents import Document
from langchain_community.retrievers import BM25Retriever
def load_scientific_paper_dataset():
# Convert dataset entries into Document objects
scientific_paper_dataset = datasets.load_dataset("gsasikiran/Summarize-Scientific-Papers-Processed", split="train")
docs = [
Document(
page_content="\n".join([
f"Title: {scientific_paper['title']}",
f"Authors: {scientific_paper['authors']}",
f"What is it: {scientific_paper['article_classification']}",
f"Claims: {scientific_paper['claims']}",
f"Contradictions: {scientific_paper['contradictions_and_limitations']}",
f"Ethical Considerations: {scientific_paper['ethical_considerations']}",
f"Summary: {scientific_paper['executive_summary']}",
f"Subfield: {scientific_paper['field_subfield']}",
f"Theorical Implications: {scientific_paper['interpretation_and_theoretical_implications']}",
f"Method to Retrieve Info: {scientific_paper['methodological_details']}",
f"People used to get data: {scientific_paper['procedures_and_architectures']}",
f"Context of Research: {scientific_paper['research_context']}",
f"Research Hypothesis: {scientific_paper['research_question_and_hypothesis']}",
f"Three Takeways: {scientific_paper['three_takeaways']}",
f"Type of Paper: {scientific_paper['type_of_paper']}"
]),
metadata={"title": scientific_paper["title"]}
)
for scientific_paper in scientific_paper_dataset
]
return docs
# --- Scientific Paper Retriever ---
class ScientificPaperRetriever:
def __init__(self, docs):
# Build BM25 retriever from documents
self.retriever = BM25Retriever.from_documents(docs)
def run(self, query: str) -> str:
results = self.retriever.retrieve(query)
if results:
return "\n\n".join([doc.text for doc in results[:3]])
else:
return "No matching scientific paper found."