import os import re import json import time import gradio as gr import google.generativeai as genai genai.configure(api_key=os.environ["GEMINI_API_KEY"]) prompt = """You are an expert Chartered Accountancy Foundation course tutor, skilled in creating effective learning materials, especially flashcards. **Task:** Your goal is to generate comprehensive and exam-focused notes from the provided PDF chapter of a Chartered Accountancy Foundation course. These notes should be structured to aid students in grasping key concepts, definitions, formulas, and problem-solving techniques necessary to clear the examination. The notes should not contain any fillers, and should be to the point. **Input:** A PDF document containing a chapter from a Chartered Accountancy Foundation course textbook. This chapter may include: * Textual explanations of concepts and theories. * Definitions of key terms. * Formulas and equations. * Flowcharts and diagrams illustrating processes or relationships. * Example problems and solutions. * End-of-chapter questions and practice tests. **Instructions for Flashcard Generation:** 1. **Comprehensive Coverage:** Extract all *relevant* information for exam preparation. Do not miss key definitions, formulas, core concepts, or steps in processes explained in flowcharts and diagrams. Utilize information from example problems and end-of-chapter questions to generate application-based flashcards where appropriate. 2. **Notes Structure (Front & Back):** * **Front:** Pose a question, term, concept, or a part of a problem. Aim to trigger recall. Use concise and clear language. For flowcharts and diagrams, consider using a cropped image or a description on the front and the explanation/full diagram on the back. Questions from the PDF can directly be used on the front. * **Back:** Provide the answer, definition, explanation, formula, steps to solve the problem, or elaborate on the concept presented on the front. Keep the back concise but complete enough for understanding. For flowcharts and diagrams, the back can contain a textual explanation and/or the full diagram itself if it fits well. 3. **Output Format:** Return the notes in JSON format as an array of objects. Each object should have "front" and "back" keys containing the text for each side of the flashcard. 4. **Short and Long Answers:** Try to cover all the cases, both the short and long answers. 5. **Iteration and Completeness:** Generate as many comprehensive and relevant flashcards as possible from the PDF chapter. After outputting the JSON, if you believe more relevant notes can be generated from the PDF, print the word "CONTINUE" on a new line. This signals that you have not exhausted all possible flashcards. **Example Output:** ``` [{"front": "", "back": ""}, ...] CONTINUE [only if more notes can be generated and the output limit is reaching] ``` """ def upload_to_gemini(path, mime_type=None): file = genai.upload_file(path, mime_type=mime_type) print(f"Uploaded file '{file.display_name}' as: {file.uri}") return file def wait_for_files_active(files): print("Waiting for file processing...") for name in (file.name for file in files): file = genai.get_file(name) while file.state.name == "PROCESSING": print(".", end="", flush=True) time.sleep(10) file = genai.get_file(name) if file.state.name != "ACTIVE": raise Exception(f"File {file.name} failed to process") print("...all files ready") print() # Create the model generation_config = { "temperature": 1, "top_p": 0.95, "top_k": 64, "max_output_tokens": 8192, "response_mime_type": "text/plain", } model = genai.GenerativeModel( model_name="gemini-2.0-pro-exp-02-05", generation_config=generation_config, ) def send_message_with_retry(chat_session, content, max_retries=3, initial_backoff=30): for attempt in range(max_retries + 1): try: response = chat_session.send_message(content=content) return response except Exception as e: if attempt == max_retries: raise # Re-raise the exception if max retries reached backoff_time = initial_backoff * (2**attempt) print(f"Error during LLM call: {e}. Retrying in {backoff_time} seconds (Attempt {attempt + 1}/{max_retries + 1})...") time.sleep(backoff_time) return None # Should not reach here as exception is re-raised if all retries fail def extract_notes(filepath): files = [ upload_to_gemini(filepath, mime_type="application/pdf"), ] wait_for_files_active(files) chat_session = model.start_chat( history=[ { "role": "user", "parts": [files[0]], }, ] ) response = send_message_with_retry(chat_session, prompt) current_response_text = response.text while "```json" in response.text: response = send_message_with_retry(chat_session, "CONTINUE") current_response_text += "\n" + response.text pattern = r"```json(.*?)```" matches = re.findall(pattern, current_response_text, re.DOTALL) results = [] for match in matches: try: results.extend(json.loads(match)) except json.JSONDecodeError as e: print(f"JSONDecodeError: {e} for match: {match}") # Handle JSON decode error, maybe skip or log the issue return results with gr.Blocks() as demo: with gr.Row(): file_upload_section = gr.File(label="Upload file", file_count="single", type="filepath", file_types=[".pdf"]) submit_btn = gr.Button("Submit") output_json_result = gr.JSON(label="Output. Copy and paste this to the App.") submit_btn.click(extract_notes, file_upload_section, output_json_result) if __name__=="__main__": demo.queue().launch(auth=(os.environ.get("GRADIO_USERNAME"), os.environ.get("GRADIO_PASSWORD")))