Spaces:

Zaherrr
/

OOP_KG_Transform_POC

Sleeping

App Files Files Community

Zaherrr commited on Oct 11, 2024

Commit

b2ad712

verified ·

1 Parent(s): e224c53

Upload 8 files

Browse files

Files changed (8) hide show

any_to_image.py +179 -0
database_operations.py +39 -0
gemini_image_to_json.py +41 -0
graph_visualization.py +60 -0
main.py +282 -0
openai_image_to_json.py +95 -0
requirements.txt +99 -0
utils.py +42 -0

any_to_image.py ADDED Viewed

	@@ -0,0 +1,179 @@

+from PIL import Image
+import fitz  # PyMuPDF
+import os
+import pyheif
+def pdf_to_images(pdf_path):
+    # Ensure the PDF file exists
+    if not os.path.exists(pdf_path):
+        print(f"The file {pdf_path} does not exist.")
+        return []
+    # Open the PDF file
+    pdf_document = fitz.open(pdf_path)
+    # List to store PIL images
+    images = []
+    # Process each page
+    for page_num in range(len(pdf_document)):
+        # Get the page
+        page = pdf_document.load_page(page_num)
+        # Convert the page to a PIL image
+        pix = page.get_pixmap()
+        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+        # Append the image to the list
+        images.append(img)
+    # Close the PDF document
+    pdf_document.close()
+    return images
+def heic_to_image(heic_path):
+    # Ensure the HEIC file exists
+    if not os.path.exists(heic_path):
+        print(f"The file {heic_path} does not exist.")
+        return []
+    if heic_path.endswith(".HEIC"):
+        # Create the new filename by replacing .HEIC with .heic
+        new_file_path = heic_path[:-5] + ".heic"
+        # Rename the file
+        os.rename(heic_path, new_file_path)
+        print(f"Renamed: {heic_path} to {new_file_path}")
+        heic_path = new_file_path
+    try:
+        # Open the HEIC file
+        heif_file = pyheif.read(heic_path)
+        # Convert to a PIL image
+        image = Image.frombytes(
+            heif_file.mode,
+            heif_file.size,
+            heif_file.data,
+            "raw",
+            heif_file.mode,
+            heif_file.stride,
+        )
+    except Exception as e:
+        print(f"An error occurred while processing the HEIC file: {e}")
+        return []
+    return image
+def process_image(file_path):
+    if file_path.endswith(".pdf"):
+        images = pdf_to_images(file_path)
+        #  Save the images so we can pass their path to the rest of the gradio functions
+        if images:
+            output_dir = "data_processed"
+            os.makedirs(output_dir, exist_ok=True)
+            image_paths = []
+            label_prefix = file_path.split(os.sep)[-1].split(".")[0]
+            for i, img in enumerate(images, start=1):
+                image_path = os.path.join(output_dir, f"{label_prefix}_page_{i}.png")
+                img.save(image_path, "PNG")
+                # saving it back to the same path assigned by gradio so that we can benefit from gradio's cache
+                # replace the .pdf with .png in the file_path
+                file_path = file_path.replace(".pdf", ".png")
+                # img.save(file_path, "PNG")
+                # image_paths.append(image_path)
+                image_paths.append(file_path)
+                print(f"Saved {image_path}")
+                print(f"Saved {file_path}")
+            # saving the first image of the pdf only to be processed in the gradio UI.
+            # TODO: Accomodate for multiple images
+            images[0].save(file_path, "PNG")
+            return file_path, ""
+            # return image_paths[0], ""
+        else:
+            return None, "No image uploaded or invalid file"
+    elif (
+        file_path.endswith(".png")
+        or file_path.endswith(".jpg")
+        or file_path.endswith(".jpeg")
+    ):
+        print(
+            f"file_path from the image processing function for compatible images: {file_path}"
+        )
+        return file_path, ""
+    elif file_path.endswith(".JPEG"):
+        # Create the new filename by replacing .JPEG with .jpeg
+        new_file_path = file_path[:-5] + ".jpeg"
+        # Rename the file
+        os.rename(file_path, new_file_path)
+        print(f"Renamed: {file_path} to {new_file_path}")
+        file_path = new_file_path
+        return file_path, ""
+    elif file_path.endswith(".JPG"):
+        # Create the new filename by replacing .JPG with .jpg
+        new_file_path = file_path[:-4] + ".jpg"
+        # Rename the file
+        os.rename(file_path, new_file_path)
+        print(f"Renamed: {file_path} to {new_file_path}")
+        file_path = new_file_path
+        return file_path, ""
+    elif file_path.endswith(".PNG"):
+        # Create the new filename by replacing .PNG with .png
+        new_file_path = file_path[:-4] + ".png"
+        # Rename the file
+        os.rename(file_path, new_file_path)
+        print(f"Renamed: {file_path} to {new_file_path}")
+        file_path = new_file_path
+        return file_path, ""
+    elif file_path.endswith(".heic") or file_path.endswith(".HEIC"):
+        image = heic_to_image(file_path)
+        output_dir = "data_processed"
+        os.makedirs(output_dir, exist_ok=True)
+        image_path = os.path.join(
+            output_dir, f"{os.path.splitext(os.path.basename(file_path))[0]}.png"
+        )
+        image.save(image_path, "PNG")
+        # saving it back to the same path assigned by gradio so that we can benefit from gradio's cache
+        image.save(file_path, "PNG")
+        print(f"Saved {image_path}")
+        print(f"Saved {file_path}")
+        # return image_path, ""
+        return file_path, ""
+    else:
+        return None, "No image uploaded or invalid file"
+# Example usage
+if __name__ == "__main__":
+    pdf_path = "data/Augustin REMY Mindmap OOP .pdf"
+    images = pdf_to_images(pdf_path)
+    # Example: Save the images if you want to check them
+    if images:
+        output_dir = "data_processed"
+        os.makedirs(output_dir, exist_ok=True)
+        for i, img in enumerate(images, start=1):
+            image_path = os.path.join(output_dir, f"page_{i}.png")
+            img.save(image_path, "PNG")
+            print(f"Saved {image_path}")

database_operations.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from neo4j import GraphDatabase
+class Neo4jDatabase:
+    def __init__(self, uri, username, password):
+        self.driver = GraphDatabase.driver(uri, auth=(username, password))
+    def close(self):
+        self.driver.close()
+    def dump_to_neo4j(self, nodes, edges, label_prefix):
+        with self.driver.session() as session:
+            for node in nodes:
+                session.run(f"CREATE (n:{label_prefix}:Node {{id: $id, label: $label}})", id=node['id'], label=node['label'])
+            for edge in edges:
+                session.run(f"""
+                    MATCH (a:{label_prefix}:Node {{id: $source}}), (b:{label_prefix}:Node {{id: $target}})
+                    CREATE (a)-[r:RELATION {{type: $type}}]->(b)
+                """, source=edge['source'], target=edge['target'], type=edge['type'])
+    def check_existing_graph(self, label_prefix):
+        with self.driver.session() as session:
+            result = session.run(f"MATCH (n:{label_prefix}) RETURN count(n) as count")
+            count = result.single()["count"]
+        return count > 0
+    def get_graph_data(self, label_prefix):
+        with self.driver.session() as session:
+            nodes = session.run(f"MATCH (n:{label_prefix}) RETURN n.id AS id, n.label AS label")
+            edges = session.run(f"MATCH (a:{label_prefix})-[r]->(b:{label_prefix}) RETURN a.id AS source, b.id AS target, type(r) AS type")
+            nodes = [{"id": record["id"], "label": record["label"]} for record in nodes]
+            edges = [{"source": record["source"], "target": record["target"], "type": record["type"]} for record in edges]
+        return {"nodes": nodes, "edges": edges}
+    def delete_graph(self, label_prefix):
+        with self.driver.session() as session:
+            session.run(f"MATCH (n:{label_prefix}) DETACH DELETE n")

gemini_image_to_json.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import google.generativeai as genai
+from dotenv import load_dotenv
+import os
+load_dotenv()
+GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+genai.configure(api_key=GOOGLE_API_KEY)
+# gemini-1.5-pro only gives 50 requests per day. check https://ai.google.dev/pricing for more details
+# model = genai.GenerativeModel('gemini-1.5-pro',
+model = genai.GenerativeModel(
+    "gemini-1.5-flash",
+    # Set the `response_mime_type` to output JSON
+    # Pass the schema object to the `response_schema` field
+    generation_config={
+        "response_mime_type": "application/json",
+        "temperature": 0.0,
+    },
+)
+#  "response_schema": Recipe, 'max_output_tokens':4000})
+PROMPT = """
+You are responsible for extracting the entities (nodes) and relationships (edges) from the images of mind maps. The mind maps are for Object Oriented Programming.
+Don't make up facts, just extracts them. Do not create new entity types that aren't mentioned in the image, and at the same time don't miss anything.
+Give the output in JSON format with this schema:
+{
+  "nodes": [{"id": "1", "label": string},{"id": "2", "label": string}],"edges": [{"source": SOURCE_ID, "target": TARGET_ID, "type": "->"},{"source": SOURCE_ID, "target": TARGET_ID, "type": "->"}]
+}
+Now extract the entities and relationships from this image:
+"""
+def fetch_gemini_response(mind_map_image):
+    print("fetching gemini response")
+    response = model.generate_content([PROMPT, mind_map_image], stream=False)
+    return response.text

graph_visualization.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import json
+from pyvis.network import Network
+def create_graph(nodes, edges, physics_enabled=True):
+    net = Network(notebook=True, height='100vh', width='100vw', bgcolor='#222222', font_color='white', cdn_resources='remote')
+    for node in nodes:
+        net.add_node(
+            node['id'],
+            label=node['label'],
+            title=node['label'],
+            color='blue' if node['label'] == 'OOP' else 'green'
+        )
+    for edge in edges:
+        net.add_edge(edge['source'], edge['target'], title=edge['type'])
+    net.force_atlas_2based(
+        gravity=-50,
+        central_gravity=0.01,
+        spring_length=100,
+        spring_strength=0.08,
+        damping=0.4
+    )
+    options = {
+        "nodes": {
+            "physics": physics_enabled
+        },
+        "edges": {
+            "smooth": True
+        },
+        "interaction": {
+            "hover": True,
+            "zoomView": True
+        },
+        "physics": {
+            "enabled": physics_enabled,
+            "stabilization": {
+                "enabled": True,
+                "iterations": 200
+            }
+        }
+    }
+    net.set_options(json.dumps(options))
+    return net
+def visualize_graph(json_data, physics_enabled=True):
+    if isinstance(json_data, str):
+        data = json.loads(json_data)
+    else:
+        data = json_data
+    nodes = data['nodes']
+    edges = data['edges']
+    net = create_graph(nodes, edges, physics_enabled)
+    html = net.generate_html()
+    html = html.replace("'", "\"")
+    html = html.replace('<div id="mynetwork"', '<div id="mynetwork" style="height: 100vh; width: 100%;"')
+    return f"""<iframe style="width: 100%; height: 100vh; border: none; margin: 0; padding: 0;" srcdoc='{html}'></iframe>"""

main.py ADDED Viewed

	@@ -0,0 +1,282 @@

+import gradio as gr
+import json
+import os
+from PIL import Image
+from database_operations import Neo4jDatabase
+from graph_visualization import visualize_graph
+from utils import extract_label_prefix, strip_keys, format_json, validate_json
+from models.gemini_image_to_json import fetch_gemini_response
+from models.openai_image_to_json import openaiprocess_image_to_json
+from any_to_image import pdf_to_images, process_image
+# Initialize Neo4j database
+db = Neo4jDatabase("bolt://localhost:7687", "neo4j", "password123")
+def dump_to_neo4j_with_confirmation(json_content, file_path, history, previous_states):
+    if not file_path:
+        return "No image uploaded or invalid file", history, previous_states, None
+    try:
+        json_data = json.loads(json_content)
+    except json.JSONDecodeError:
+        return "Invalid JSON data. Please check your input.", history, previous_states, None
+    label_prefix = extract_label_prefix(file_path)
+    if db.check_existing_graph(label_prefix):
+        previous_state = db.get_graph_data(label_prefix)
+        return f"A graph with label prefix '{label_prefix}' already exists in the database. Do you want to overwrite it?", history, previous_states, label_prefix
+    else:
+        json_data = strip_keys(json_data)
+        db.dump_to_neo4j(json_data['nodes'], json_data['edges'], label_prefix)
+        result = f"Data successfully dumped into the database with label prefix '{label_prefix}'."
+        new_history = f"{history}\n[NEW ENTRY] {result}" if history else f"[NEW ENTRY] {result}"
+        previous_states[label_prefix] = []
+        return result, new_history, previous_states, None
+def confirm_overwrite(confirmation, gradio_state, json_content, file_path, history, previous_states):
+    if confirmation.lower() == 'yes':
+        try:
+            label_prefix = extract_label_prefix(file_path)
+            previous_state = db.get_graph_data(label_prefix)
+            # print(f'previous_state from the confirm_overwrite function: {previous_state}')
+            # print(f'label_prefix from the confirm_overwrite function: {label_prefix}')
+            # print(f'previouse_states from the confirm_overwrite function: {previous_states}')
+            if label_prefix not in previous_states:
+                previous_states[label_prefix] = []
+                previous_states[label_prefix].append(previous_state)
+            else:
+                previous_states[label_prefix].append(previous_state)
+            if len(previous_states[label_prefix]) > 3:
+                previous_states[label_prefix] = previous_states[label_prefix][-3:]
+            db.delete_graph(label_prefix)
+            json_data = json.loads(json_content)
+            json_data = strip_keys(json_data)
+            db.dump_to_neo4j(json_data['nodes'], json_data['edges'], label_prefix)
+            result = f"Data successfully overwritten in the database with label prefix '{label_prefix}'."
+            new_history = f"{history}\n[OVERWRITE] {result}" if history else f"[OVERWRITE] {result}"
+            return result, new_history, previous_states, ""
+        except json.JSONDecodeError:
+            return "Invalid JSON data. Please check your input.", history, previous_states, ""
+    else:
+        return "Operation cancelled. The existing graph was not overwritten.", history, previous_states, ""
+def revert_last_action(history, previous_states):
+    if not history:
+        return "No actions to revert.", history, previous_states
+    last_action = history.split('\n')[-1]
+    label_prefix = last_action.split("'")[1]
+    if label_prefix in previous_states and previous_states[label_prefix]:
+        db.delete_graph(label_prefix)
+        db.dump_to_neo4j(previous_states[label_prefix][-1]['nodes'], previous_states[label_prefix][-1]['edges'], label_prefix)
+        new_history = history + f"\n[REVERT] Reverted overwrite of graph with label prefix '{label_prefix}'"
+        previous_states[label_prefix].pop()
+        return f"Reverted last action: {last_action}", new_history, previous_states
+    elif label_prefix in previous_states and not previous_states[label_prefix]:
+        db.delete_graph(label_prefix)
+        new_history = history + f"\n[REVERT] Deleted newly added graph with label prefix '{label_prefix}'"
+        del previous_states[label_prefix]
+        return f"Reverted last action: {last_action}", new_history, previous_states
+    else:
+        return "Unable to revert the last action.", history, previous_states
+def update_graph_from_edited_json(json_content, physics_enabled):
+    try:
+        json_data = json.loads(json_content)
+        json_data = strip_keys(json_data)
+        validate_json(json_data)
+        return visualize_graph(json_data, physics_enabled), ""
+    except json.JSONDecodeError as e:
+        return None, f"Invalid JSON format: {str(e)}"
+    except ValueError as e:
+        return None, f"Invalid graph structure: {str(e)}"
+    except Exception as e:
+        return None, f"An unexpected error occurred: {str(e)}"
+def fetch_kg(image_file_path, model_choice_state):
+    if image_file_path:
+        mind_map_image = Image.open(image_file_path)
+        if model_choice_state == 'Gemini':
+            print(f'model choice is gemini')
+            kg_json_text = fetch_gemini_response(mind_map_image)
+        elif model_choice_state == 'OpenAI':
+            print(f'model choice is openai')
+            kg_json_text = openaiprocess_image_to_json(mind_map_image)
+        json_data = json.loads(kg_json_text)
+        return format_json(json_data), ""
+    return "", "No image uploaded or invalid file"
+def input_file_handler(file_path):
+    if file_path:
+        image_path, error = process_image(file_path)
+        return image_path, error
+    return "", "No image uploaded or invalid file"
+# Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("## Image to Knowledge Graph Transformation")
+    with gr.Row():
+        file_input = gr.File(label="Upload File", file_count="single",
+                             type="filepath",
+                             file_types=[".pdf", ".png", ".jpeg", ".jpg", ".heic"])
+        image_file = gr.Image(label="Input Image", type="filepath", visible=False)
+        json_editor = gr.Textbox(label="Edit JSON", lines=15, placeholder="JSON data will appear here after image upload")
+    with gr.Row():
+        with gr.Column():
+            with gr.Row():
+                CCW_rotate_button = gr.Button('Rotate Image Counter-Clockwise')
+                CW_rotate_button = gr.Button('Rotate Image Clockwise')
+        with gr.Column():
+            model_call = gr.Button('Transform Image into KG representation', scale=2)
+    with gr.Row():
+        physics_button = gr.Checkbox(value=True, label="Enable Graph Physics")
+        model_choice = gr.Radio(label="Select Model", choices=["OpenAI", "Gemini"], value="Gemini", interactive=True)
+    graph_output = gr.HTML(label="Graph Output")
+    error_output = gr.Textbox(label="Error Messages", interactive=False)
+    update_button = gr.Button("Update Graph")
+    dump_button = gr.Button("Dump to Neo4j")
+    revert_button = gr.Button("Revert Last Action")
+    history_block = gr.Textbox(label="History", placeholder="Graphs pushed to the Database", interactive=False, lines=5, max_lines=50)
+    history_state = gr.State("")
+    previous_states = gr.State({})
+    confirmation_output = gr.Textbox(label="Confirmation Message", visible=False, interactive=False)
+    confirmation_input = gr.Textbox(label="Type 'yes' to confirm overwrite", visible=False, interactive=True)
+    confirm_button = gr.Button("Confirm Overwrite", visible=False)
+    file_input.upload(
+        fn=input_file_handler,
+        inputs=[file_input],
+        outputs=[image_file, error_output]
+    ).then(
+        lambda image_file: (
+            gr.Image(value=image_file, visible=True),
+            gr.File(visible=False)
+        ),
+        inputs=[image_file],
+        outputs=[image_file, file_input]
+    )
+    image_file.clear(
+        lambda file_input, image_file: (
+            gr.File(visible=True),
+            gr.Image(visible=False)
+            ),
+        inputs=[file_input, image_file],
+        outputs=[file_input, image_file]
+    )
+    def rotate_image_to_left(image_path):
+        if image_path:
+            image = Image.open(image_path)
+            image = image.rotate(-90, expand=True)
+            image.save(image_path)
+            return image_path
+    CW_rotate_button.click(
+        fn=rotate_image_to_left,
+        inputs=[image_file],
+        outputs=[image_file]
+    )
+    def rotate_image_to_right(image_path):
+        if image_path:
+            image = Image.open(image_path)
+            image = image.rotate(90, expand=True)
+            image.save(image_path)
+            return image_path
+    CCW_rotate_button.click(
+        fn=rotate_image_to_right,
+        inputs=[image_file],
+        outputs=[image_file]
+    )
+    dump_button.click(
+        dump_to_neo4j_with_confirmation,
+        inputs=[json_editor, image_file, history_state, previous_states],
+        outputs=[confirmation_output, history_state, previous_states, gr.State()]
+    ).then(
+        lambda message, history, previous_states, label_prefix: (
+            gr.Textbox(value=message, visible=True),
+            gr.Textbox(visible=True),
+            gr.Button(visible=True),
+            history,
+            previous_states,
+            label_prefix
+        ),
+        inputs=[confirmation_output, history_state, previous_states, gr.State()],
+        outputs=[confirmation_output, confirmation_input, confirm_button, history_state, previous_states, gr.State()]
+    ).then(
+        lambda history: history,
+        inputs=[history_state],
+        outputs=[history_block]
+    )
+    gr.on(
+        triggers=[confirm_button.click, confirmation_input.submit],
+        fn=confirm_overwrite,
+        inputs=[confirmation_input, gr.State(), json_editor, image_file, history_state, previous_states],
+        outputs=[confirmation_output, history_state, previous_states, confirmation_input]
+    ).then(
+        lambda confirmation_output, confirmation_input: (
+            gr.Textbox(value=confirmation_output, visible=True),
+            gr.Textbox(value='', visible=False),
+            gr.Button(visible=False)
+        ),
+        inputs=[confirmation_output, confirmation_input],
+        outputs=[confirmation_output, confirmation_input, confirm_button]
+    ).then(
+        lambda history: history,
+        inputs=[history_state],
+        outputs=[history_block]
+    )
+    revert_button.click(
+        revert_last_action,
+        inputs=[history_state, previous_states],
+        outputs=[confirmation_output, history_state, previous_states]
+    ).then(
+        lambda confirmation_output: gr.Textbox(value=confirmation_output, visible=True),
+        inputs=[confirmation_output],
+        outputs=[confirmation_output]
+    ).then(
+        lambda history: history,
+        inputs=[history_state],
+        outputs=[history_block]
+    )
+    update_button.click(
+        update_graph_from_edited_json,
+        inputs=[json_editor, physics_button],
+        outputs=[graph_output, error_output]
+    )
+    physics_button.change(
+        update_graph_from_edited_json,
+        inputs=[json_editor, physics_button],
+        outputs=[graph_output, error_output]
+    )
+    model_call.click(
+        fn=fetch_kg,
+        inputs=[image_file, model_choice],
+        outputs=[json_editor, error_output]
+    )
+if __name__ == "__main__":
+    demo.launch()

openai_image_to_json.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import base64
+import requests
+from io import BytesIO
+from PIL import Image
+import os
+from dotenv import load_dotenv
+import json
+# Load the .env file
+load_dotenv()
+# Get the API key from the environment
+api_key = os.getenv('OPENAI_API_KEY')
+# Function to encode the image
+def encode_image(image):
+    # Convert the image to RGB if it has an alpha channel
+    if image.mode == 'RGBA':
+        image = image.convert('RGB')
+    buffered = BytesIO()
+    image.save(buffered, format="JPEG")
+    return base64.b64encode(buffered.getvalue()).decode('utf-8')
+def openaiprocess_image_to_json(image):
+    print(f'fetching openai response')
+    # Encode the image
+    base64_image = encode_image(image)
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {api_key}"
+    }
+    PROMPT = '''
+    You are responsible for extracting the entities (nodes) and relationships (edges) from the images of mind maps. The mind maps are for Object Oriented Programming.
+    Don't make up facts, just extracts them. Do not create new entity types that aren't mentioned in the image, and at the same time don't miss anything.
+    Give the output in JSON format as follows:
+    {
+    "nodes": [
+        {"id": "1", "label": string},
+        {"id": "2", "label": string},...
+        ],
+    "edges": [
+        {"source": SOURCE_ID, "target": TARGET_ID, "type": "->"},
+        {"source": SOURCE_ID, "target": TARGET_ID, "type": "->"},...
+        ]
+    }
+    Only return valid python dictionary, dont include (line jump)n in it, dont include spaces, only a dictionary. Do not include any other text outside the Dictionary structure. Make sure that i will get a valid Python dictionary.
+    make sure that what you return as json_string i can use it in python in this function: json.loads(json_string)
+    Now extract the entities and relationships from this image:
+    '''
+    payload = {
+        "model": "gpt-4o",
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {
+                    "type": "text",
+                    "text": PROMPT
+                    },
+                    {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/jpeg;base64,{base64_image}"
+                    }
+                    }
+                ]
+            }
+        ]
+    }
+    # Send the request to the OpenAI API
+    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
+    # Parse the response
+    response_data = response.json()
+    print(response_data)
+    # Extract the JSON graph data from the response
+    if "choices" in response_data and response_data["choices"]:
+        content = response_data["choices"][0]["message"]["content"]
+        try:
+            graph_data = content
+        except json.JSONDecodeError as e:
+            print("Failed:", e)
+            graph_data = None
+    else:
+        raise ValueError("No valid response from OpenAI API")
+    return graph_data

requirements.txt ADDED Viewed

	@@ -0,0 +1,99 @@

+aiofiles==23.2.1
+annotated-types==0.7.0
+anyio==4.4.0
+asttokens==2.4.1
+cachetools==5.5.0
+certifi==2024.8.30
+cffi==1.17.1
+charset-normalizer==3.3.2
+click==8.1.7
+contourpy==1.3.0
+cycler==0.12.1
+decorator==5.1.1
+exceptiongroup==1.2.2
+executing==2.1.0
+fastapi==0.114.1
+ffmpy==0.4.0
+filelock==3.16.0
+fonttools==4.53.1
+fsspec==2024.9.0
+google-ai-generativelanguage==0.6.9
+google-api-core==2.19.2
+google-api-python-client==2.145.0
+google-auth==2.34.0
+google-auth-httplib2==0.2.0
+google-generativeai==0.8.1
+googleapis-common-protos==1.65.0
+gradio==4.44.0
+gradio_client==1.3.0
+grpcio==1.66.1
+grpcio-status==1.66.1
+h11==0.14.0
+httpcore==1.0.5
+httplib2==0.22.0
+httpx==0.27.2
+huggingface-hub==0.24.7
+idna==3.8
+importlib_resources==6.4.5
+ipython==8.27.0
+jedi==0.19.1
+Jinja2==3.1.4
+jsonpickle==3.3.0
+kiwisolver==1.4.7
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.9.2
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+neo4j==5.24.0
+networkx==3.3
+numpy==2.1.1
+orjson==3.10.7
+packaging==24.1
+pandas==2.2.2
+parso==0.8.4
+pexpect==4.9.0
+pillow==10.4.0
+prompt_toolkit==3.0.47
+proto-plus==1.24.0
+protobuf==5.28.1
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pyasn1==0.6.1
+pyasn1_modules==0.4.1
+pycparser==2.22
+pydantic==2.9.1
+pydantic_core==2.23.3
+pydub==0.25.1
+Pygments==2.18.0
+pyheif==0.8.0
+PyMuPDF==1.24.10
+PyMuPDFb==1.24.10
+pyparsing==3.1.4
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-multipart==0.0.9
+pytz==2024.2
+pyvis==0.3.2
+PyYAML==6.0.2
+requests==2.32.3
+rich==13.8.1
+rsa==4.9
+ruff==0.6.4
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+stack-data==0.6.3
+starlette==0.38.5
+tomlkit==0.12.0
+tqdm==4.66.5
+traitlets==5.14.3
+typer==0.12.5
+typing_extensions==4.12.2
+tzdata==2024.1
+uritemplate==4.1.1
+urllib3==2.2.3
+uvicorn==0.30.6
+wcwidth==0.2.13
+websockets==12.0

utils.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import os
+import json
+def extract_label_prefix(file_name):
+    label_prefix = os.path.splitext(os.path.basename(file_name))[0]
+    return label_prefix.replace(" ", "_").replace("-", "_")
+def strip_keys(d):
+    if isinstance(d, dict):
+        return {k.strip(): strip_keys(v) for k, v in d.items()}
+    elif isinstance(d, list):
+        return [strip_keys(i) for i in d]
+    else:
+        return d
+def format_json(json_data):
+    formatted_json = "{\n  \"nodes\": [\n"
+    for node in json_data['nodes']:
+        formatted_json += f"    {json.dumps(node)},\n"
+    formatted_json = formatted_json.rstrip(',\n') + "\n  ],\n  \"edges\": [\n"
+    for edge in json_data['edges']:
+        formatted_json += f"    {json.dumps(edge)},\n"
+    formatted_json = formatted_json.rstrip(',\n') + "\n  ]\n}"
+    return formatted_json
+def validate_json(json_data):
+    if not isinstance(json_data, dict) or 'nodes' not in json_data or 'edges' not in json_data:
+        raise ValueError("JSON must contain 'nodes' and 'edges' keys")
+    if not isinstance(json_data['nodes'], list) or not isinstance(json_data['edges'], list):
+        raise ValueError("'nodes' and 'edges' must be lists")
+    for node in json_data['nodes']:
+        if 'id' not in node or 'label' not in node:
+            raise ValueError("Each node must have 'id' and 'label' properties")
+    for edge in json_data['edges']:
+        if 'source' not in edge or 'target' not in edge or 'type' not in edge:
+            raise ValueError("Each edge must have 'source', 'target', and 'type' properties")
+        if edge['type'] != "->":
+            raise ValueError("Edge type must be '->' strictly")