Spaces:
Sleeping
Sleeping
| from langchain.memory import ConversationBufferMemory | |
| import pandas as pd | |
| import gradio as gr | |
| from langchain.embeddings import OpenAIEmbeddings | |
| from langchain.vectorstores import FAISS | |
| import os | |
| import pandas as pd | |
| from langchain.embeddings import OpenAIEmbeddings | |
| from langchain.vectorstores import FAISS | |
| from langchain.schema import Document | |
| import os | |
| from utils import make_html_source, make_pairs, get_llm, reset_textbox | |
| from prompt import PROMPT_INTERPRATE_INTENTION, ANSWER_PROMPT | |
| try: | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| except Exception: | |
| pass | |
| # Load your OpenAI API key | |
| import os | |
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
| assert OPENAI_API_KEY, "Please set your OpenAI API key" | |
| embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY) | |
| new_vector_store = FAISS.load_local( | |
| "faiss_index", embeddings, allow_dangerous_deserialization=True | |
| ) | |
| retriever = new_vector_store.as_retriever() | |
| QUESTIONS = [ | |
| "Give me 10 bullet points to summarize the key decisions of the 94th meeting.", | |
| "Summarize the policy decisions of the 94th meeting.", | |
| "Give me an example of a decision that applied a penalty to a country?", | |
| "Show me the decisions related to end users", | |
| "Give me a policy on MDI (Metered Dosed Inhalers)", | |
| ] | |
| llm = get_llm() | |
| memory = ConversationBufferMemory( | |
| return_messages=True, output_key="answer", input_key="question" | |
| ) | |
| def make_qa_chain( | |
| ) : | |
| final_inputs = { | |
| "context": lambda x: x["context"], | |
| "question": lambda x: x["question"], | |
| } | |
| return final_inputs | ANSWER_PROMPT | llm | |
| def load_documents_meeting(meeting_number): | |
| # Step 1: Load the CSV data | |
| csv_file_path = "data/mfls.xlsx" | |
| df = pd.read_excel(csv_file_path) | |
| df["meeting_number"]= df["Meeting"].apply(lambda x: x.split(" ")[0][:-2]) | |
| df_meeting = df[df["meeting_number"] == meeting_number] | |
| def combine_title_and_content(row): | |
| return f"{row['Meeting']} {row['Issues']} {row['Content']}" | |
| df_meeting['combined'] = df_meeting.apply(combine_title_and_content, axis=1) | |
| # Step 3: Generate embeddings using OpenAI | |
| embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY) | |
| # Generate embeddings for each document | |
| documents = [ | |
| Document( | |
| page_content=row['combined'], | |
| metadata={ | |
| "Issues": row['Issues'], | |
| "Title": row['Title'], | |
| "meeting_number": row["Meeting"].split(" ")[0][:-2], | |
| "Agencies": row["Agencies"], | |
| "project": row["Projects"], | |
| } | |
| ) for i,row in df_meeting.iterrows()] | |
| return documents | |
| async def chat( | |
| query: str, | |
| history: list = [], | |
| ): | |
| """taking a query and a message history, use a pipeline (reformulation, retriever, answering) to yield a tuple of: | |
| (messages in gradio format, messages in langchain format, source documents)""" | |
| source_string = "" | |
| gradio_format = make_pairs([a.content for a in history]) + [(query, "")] | |
| qa_chain = make_qa_chain() | |
| # reset memory | |
| memory.clear() | |
| for message in history: | |
| memory.chat_memory.add_message(message) | |
| inputs = {"question": query} | |
| ## INTENT | |
| intent = await llm.abatch([PROMPT_INTERPRATE_INTENTION.format_prompt(query = query)]) | |
| intent = intent[0].content | |
| print("intent", intent) | |
| ## RETRIEVER | |
| if intent.split(" ")[0] == "meeting": | |
| meeting_number = intent.split(" ")[-1] | |
| sources = load_documents_meeting(meeting_number) | |
| else : | |
| sources = new_vector_store.search(query, search_type="similarity", k=5) | |
| source_string = "\n\n".join([make_html_source(doc, i) for i, doc in enumerate(sources, 1)]) | |
| ## RAG | |
| inputs_rag = {"question": query, "context": sources} | |
| result = qa_chain.astream_log(inputs_rag) | |
| reformulated_question_path_id = "/logs/ChatOpenAI/streamed_output_str/-" | |
| retriever_path_id = "/logs/VectorStoreRetriever/final_output" | |
| final_answer_path_id = "/streamed_output/-" | |
| async for op in result: | |
| op = op.ops[0] | |
| # print(op["path"]) | |
| if op['path'] == reformulated_question_path_id: # reforulated question | |
| new_token = op['value'] # str | |
| elif op['path'] == retriever_path_id: # documents | |
| sources = op['value']['documents'] # List[Document] | |
| source_string = "\n\n".join([make_html_source(i, doc) for i, doc in enumerate(sources, 1)]) | |
| elif op['path'] == final_answer_path_id: # final answer | |
| new_token = op['value'].content # str | |
| answer_yet = gradio_format[-1][1] | |
| gradio_format[-1] = (query, answer_yet + new_token ) | |
| yield gradio_format, history, source_string | |
| memory.save_context(inputs, {"answer": gradio_format[-1][1]}) | |
| yield gradio_format, memory.load_memory_variables({})["history"], source_string | |
| ### GRADIO UI | |
| theme = gr.themes.Soft( | |
| primary_hue="sky", | |
| font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"], | |
| ) | |
| demo_name = "UNEP Q&A" | |
| with gr.Blocks(title=f"{demo_name}", theme=theme, css_paths=os.getcwd()+ "/style.css") as demo: | |
| gr.Markdown(f"<h1><center>{demo_name}</center></h1>") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| chatbot = gr.Chatbot( | |
| value = [("","Hello ! How can I help you today ?")], | |
| elem_id="chatbot", | |
| label=f"{demo_name} chatbot", | |
| show_label=False | |
| ) | |
| state = gr.State([]) | |
| with gr.Row(): | |
| ask = gr.Textbox( | |
| show_label=False, | |
| placeholder="Input your question then press enter", | |
| ) | |
| with gr.Column(scale=1, variant="panel"): | |
| with gr.Tabs() as tabs: | |
| with gr.TabItem("Examples", id= 0): | |
| example_hidden = gr.Textbox(visible = False) | |
| examples_questions = gr.Examples( | |
| QUESTIONS, | |
| [example_hidden], | |
| run_on_click=False, | |
| elem_id=f"examples", | |
| api_name=f"examples", | |
| ) | |
| with gr.TabItem("Sources", id= 1): | |
| gr.Markdown("### Sources") | |
| sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox") | |
| ask.submit(lambda : gr.update(selected=1), outputs= [tabs]).then( | |
| fn=chat, | |
| inputs=[ | |
| ask, | |
| state, | |
| ], | |
| outputs=[chatbot, state, sources_textbox], | |
| ) | |
| example_hidden.change(lambda : gr.update(selected=1), outputs= [tabs]).then( | |
| fn=chat, | |
| inputs=[ | |
| example_hidden, | |
| state, | |
| ], | |
| outputs=[chatbot, state, sources_textbox], | |
| ) | |
| ask.submit(reset_textbox, [], [ask]) | |
| demo.queue() | |
| demo.launch( | |
| ssr_mode=False | |
| ) |