Spaces:
Sleeping
Sleeping
| import dspy | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| from langchain_community.vectorstores import Chroma | |
| from langchain_openai import OpenAIEmbeddings | |
| persist_directory = "embeddings_db" | |
| lm = dspy.LM("openai/gpt-4o-mini") | |
| dspy.configure(lm=lm) | |
| embedding_function = OpenAIEmbeddings(model="text-embedding-3-small") | |
| vectordb = Chroma( | |
| persist_directory=persist_directory, embedding_function=embedding_function | |
| ) | |
| retriever = vectordb.as_retriever() | |
| def retrieve(inputs): | |
| docs = retriever.invoke(inputs["question"]) | |
| return docs | |
| def get_source_pages(docs): | |
| source_pages = [] | |
| for doc in docs: | |
| section = doc.metadata["source"].split("/")[-2] | |
| page = doc.metadata["source"].split("/")[-1].split(".")[0] | |
| source_pages.append(f"{section} - {page}") | |
| source_pages = list(set(source_pages)) | |
| return source_pages | |
| class COT_RAG(dspy.Module): | |
| def __init__(self): | |
| self.respond = dspy.ChainOfThought("context, question -> response") | |
| def forward(self, question): | |
| question_ = "Given the context from Cory Booker's speech, please answer the question below." | |
| question_ += f"\n\nQuestion: {question}\n\nStart your answer by specifying this was from Senator Booker." | |
| docs = retrieve({"question": question}) | |
| self.docs = docs | |
| context = [doc.page_content for doc in docs] | |
| return self.respond(context=context, question=question_) | |
| def answer_question(question): | |
| rag = COT_RAG() | |
| answer = rag.forward(question) | |
| return answer.response | |