from langchain_core.documents import Document from typing import List import pandas as pd def format_docs_xml(docs: List[Document]) -> str: formatted_docs = [ f"\n{doc.metadata['source']}\n{doc.page_content}\n" for i, doc in enumerate(docs) ] return f"\n\n\n{chr(10).join(formatted_docs)}\n" # def format_docs_xml(docs: List[Document]) -> str: # """ # Takes a list of Document objects and formats each into XML. # """ # formatted_docs = [] # for i, doc in enumerate(docs): # metadata_source = doc.metadata.get("source", "Unknown") # snippet = doc.page_content or "" # formatted = ( # f'\n' # f'{metadata_source}\n' # f'{snippet}\n' # f'' # ) # formatted_docs.append(formatted) # return f"\n\n\n{chr(10).join(formatted_docs)}\n" def get_article_info(df: pd.DataFrame, file_name: str): """ Given a DataFrame and a file name, return the corresponding title and link from the CSV. Assumes file_name is unique in the DF. """ edited_file_name = file_name.replace("\\", "/").replace("Articles/", "").replace("Articles\\", "") row = df[df["file_name"] == edited_file_name] if row.empty: # Fallback if not found return "IBHS Website", "https://ibhs.org" return row["title"].iloc[0], row["link"].iloc[0]