| | import gradio as gr |
| | import os |
| | from utils.document_parsing import DocParsing |
| | from utils.retrieval import Retrieval |
| | from utils.llm_generation import LLMGeneration |
| | import json |
| |
|
| |
|
| | embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2" |
| | |
| | retriever = Retrieval(model_name=embedding_model_name) |
| |
|
| |
|
| | llm_model_name = "gpt-4o-mini" |
| | |
| | llm_generator = None |
| |
|
| |
|
| | def set_api_key(api_key: str) -> None: |
| | """ |
| | Sets the OpenAI API key as an environment variable. |
| | |
| | Parameters: |
| | api_key (str): The OpenAI API key to be set. |
| | |
| | Returns: |
| | None: This function does not return any value. |
| | |
| | Raises: |
| | gr.Error: If the provided API key is empty or consists only of whitespace characters. |
| | """ |
| | if api_key.strip(): |
| | os.environ["OPENAI_API_KEY"] = api_key |
| | else: |
| | raise gr.Error("Please provide a valid API key") |
| |
|
| |
|
| | def process_inputs(api_key: str, pdf_file, questions: str) -> str: |
| | """ |
| | This function processes the inputs, sets up the API key, validates the PDF file, parses the PDF, |
| | creates a vector store, generates an LLM generator, validates the questions, retrieves top similar chunks, |
| | generates answers, and returns the output in JSON format. |
| | |
| | Parameters: |
| | api_key (str): The OpenAI API key for accessing the LLM model. |
| | pdf_file (File): The uploaded PDF file. |
| | questions (str): The list of questions, one per line. |
| | |
| | Returns: |
| | str: The output in JSON format containing the answers to the questions. |
| | """ |
| | |
| | set_api_key(api_key) |
| |
|
| | if pdf_file is None: |
| | raise gr.Error("Please upload a pdf file") |
| |
|
| | |
| | doc_handler = DocParsing(file_path=pdf_file.name, model_name=embedding_model_name) |
| | docs = doc_handler.process_pdf() |
| |
|
| | |
| | retriever.create_vector_store(chunks=docs) |
| |
|
| | |
| | llm_generator = LLMGeneration(llm_model_name=llm_model_name) |
| |
|
| | if not questions.strip(): |
| | raise gr.Error("Please provide valid set of questions") |
| | output_dict = {} |
| | questions_list = questions.strip().split("\n") |
| | for question in questions_list: |
| |
|
| | |
| | similar_chunks = retriever.search(query=question, k=10) |
| |
|
| | |
| | output_dict[question] = llm_generator.generate_answer(question, similar_chunks) |
| |
|
| | response = json.dumps(output_dict, indent=4) |
| | return response |
| |
|
| |
|
| | with gr.Blocks() as demo: |
| | gr.Markdown("# AskMYPDF Q&A App") |
| | gr.Markdown( |
| | "Enter your OPENAI API key, upload a PDF, and list your questions below." |
| | ) |
| |
|
| | api_key_input = gr.Textbox(label="API Key", type="password") |
| | pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"]) |
| | questions_input = gr.Textbox( |
| | label="List of Questions (one per line)", |
| | lines=5, |
| | placeholder="Question 1\nQuestion 2\n...", |
| | ) |
| |
|
| | submit_button = gr.Button("Submit") |
| | output = gr.Textbox(label="Output") |
| |
|
| | submit_button.click( |
| | fn=process_inputs, |
| | inputs=[api_key_input, pdf_input, questions_input], |
| | outputs=output, |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |
| |
|