Spaces:
Sleeping
Sleeping
| import datasets | |
| from langchain_core.documents import Document | |
| from langchain_community.retrievers import BM25Retriever | |
| def load_scientific_paper_dataset(): | |
| # Convert dataset entries into Document objects | |
| scientific_paper_dataset = datasets.load_dataset("gsasikiran/Summarize-Scientific-Papers-Processed", split="train") | |
| docs = [ | |
| Document( | |
| page_content="\n".join([ | |
| f"Title: {scientific_paper['title']}", | |
| f"Authors: {scientific_paper['authors']}", | |
| f"What is it: {scientific_paper['article_classification']}", | |
| f"Claims: {scientific_paper['claims']}", | |
| f"Contradictions: {scientific_paper['contradictions_and_limitations']}", | |
| f"Ethical Considerations: {scientific_paper['ethical_considerations']}", | |
| f"Summary: {scientific_paper['executive_summary']}", | |
| f"Subfield: {scientific_paper['field_subfield']}", | |
| f"Theorical Implications: {scientific_paper['interpretation_and_theoretical_implications']}", | |
| f"Method to Retrieve Info: {scientific_paper['methodological_details']}", | |
| f"People used to get data: {scientific_paper['procedures_and_architectures']}", | |
| f"Context of Research: {scientific_paper['research_context']}", | |
| f"Research Hypothesis: {scientific_paper['research_question_and_hypothesis']}", | |
| f"Three Takeways: {scientific_paper['three_takeaways']}", | |
| f"Type of Paper: {scientific_paper['type_of_paper']}" | |
| ]), | |
| metadata={"title": scientific_paper["title"]} | |
| ) | |
| for scientific_paper in scientific_paper_dataset | |
| ] | |
| return docs | |
| # --- Scientific Paper Retriever --- | |
| class ScientificPaperRetriever: | |
| def __init__(self, docs): | |
| # Build BM25 retriever from documents | |
| self.retriever = BM25Retriever.from_documents(docs) | |
| def run(self, query: str) -> str: | |
| results = self.retriever.retrieve(query) | |
| if results: | |
| return "\n\n".join([doc.text for doc in results[:3]]) | |
| else: | |
| return "No matching scientific paper found." | |