Spaces:

Carbaz
/

python_c-extensions

Sleeping

App Files Files Community

Carbaz commited on May 23

Commit

3aa82ed

verified ·

1 Parent(s): 26bead7

Sync from GitHub

Browse files

Files changed (1) hide show

python_c_ext_generator.ipynb +474 -0

python_c_ext_generator.ipynb ADDED Viewed

	@@ -0,0 +1,474 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "4a6ab9a2-28a2-445d-8512-a0dc8d1b54e9",
+   "metadata": {},
+   "source": [
+    "# Python C extension generator\n",
+    "\n",
+    "Use an LLM model to generate a high performance Python C extension code from Python code.\n",
+    "\n",
+    "Python C extension modules allows to integrate C coded and compiled modules into Python applications.\n",
+    "\n",
+    "* [Python C Extensions](https://docs.python.org/3.13/extending/index.html)\n",
+    "* [Python's C API](https://docs.python.org/3.13/c-api/index.html)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e610bf56-a46e-4aff-8de1-ab49d62b1ad3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Imports.\n",
+    "\n",
+    "import os\n",
+    "import sys\n",
+    "from time import perf_counter\n",
+    "from timeit import timeit\n",
+    "\n",
+    "from dotenv import load_dotenv\n",
+    "from openai import OpenAI\n",
+    "from pydantic import BaseModel"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4f672e1c-87e9-4865-b760-370fa605e614",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load environment variables from '.env' file.\n",
+    "\n",
+    "load_dotenv(override=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8aa149ed-9298-4d69-8fe2-8f5de0f667da",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize client and set the default LLM model to use.\n",
+    "\n",
+    "OPENAI_MODEL = \"gpt-5.1-codex-mini\"\n",
+    "\n",
+    "openai = OpenAI()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c6f37bf0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define Pydantic model class for GPT response parsing.\n",
+    "\n",
+    "class Extension_codes(BaseModel):\n",
+    "    \"\"\"Pydantic model of a response containing the generated C code, the 'setup.py' code and an usage example.\"\"\"\n",
+    "    c_code: str\n",
+    "    setup: str\n",
+    "    usage: str"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cb6ce77a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define a function to print the optimization codes.\n",
+    "\n",
+    "def print_optimization(optimization):\n",
+    "    \"\"\"Print the optimization codes.\"\"\"\n",
+    "    print(f\"C CODE:\\n{optimization.c_code}\")\n",
+    "    print(\"---------------------------\")\n",
+    "    print(f\"setup.py:\\n{optimization.setup}\")\n",
+    "    print(\"---------------------------\")\n",
+    "    print(f\"USAGE:\\n{optimization.usage}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "71e1ba8c-5b05-4726-a9f3-8d8c6257350b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define a function to write outputs to a file with a given filename.\n",
+    "\n",
+    "def write_file(data, filename):\n",
+    "    \"\"\"Write data to a file with the specified filename.\"\"\"\n",
+    "    with open(filename, \"w\") as file:\n",
+    "        file.write(data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f13c9c97",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define a function to write the optimization codes to files.\n",
+    "\n",
+    "def write_optimization(optimization, module_name):\n",
+    "    \"\"\"Write the optimization codes to files.\"\"\"\n",
+    "    write_file(optimization.c_code, f\"{module_name}.c\")\n",
+    "    write_file(optimization.setup, \"setup.py\")\n",
+    "    write_file(optimization.usage, \"usage_example.py\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6896636f-923e-4a2c-9d6c-fac07828a201",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define system message for the LLM with instructions for generating the C extension code.\n",
+    "\n",
+    "system_message = \"\"\"\n",
+    "You are an assistant that reimplements Python code in high performance C extensions for Python.\n",
+    "Your responses must always be a JSON with the following structure:\n",
+    "\n",
+    "{\n",
+    "    \"c_code\": \"Optimized C extension for Python code\",\n",
+    "    \"setup\": \"The 'setup.py' code to compile the C extension for Python\",\n",
+    "    \"usage\": \"An example of usage of the C extension for Python code with time measurement and comparing with the original Python code\"\n",
+    "}\n",
+    "\n",
+    "Use comments sparingly and do not provide any explanation other than occasional comments.\n",
+    "The C extension for Python needs to produce an identical output in the fastest possible time.\n",
+    "Make sure the C extension for Python code is correct and can be compiled with 'python setup.py build' and used in Python.\n",
+    "The usage example must include a time measurement and a comparison with the original Python code.\n",
+    "Do not include any additional text or explanation outside the JSON structure.\n",
+    "Make sure the JSON is correctly formatted.\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8e7b3546-57aa-4c29-bc5d-f211970d04eb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define user prompt template and function to fill it.\n",
+    "\n",
+    "def user_prompt_for(python_code, module_name):\n",
+    "    user_prompt = f\"\"\"\n",
+    "    Reimplement this Python code as a C extension for Python with the fastest possible implementation that produces identical output in the least time.\n",
+    "    Respond only with C extension for Python code, do not explain your work other than a few code comments.\n",
+    "    The module name, used to import, must be \"{module_name}\", the generated C file will be named \"{module_name}.c\".\n",
+    "    Pay attention to number types to ensure no int overflows.\n",
+    "    Remember to #include all necessary C packages such as iomanip or <python.h>\n",
+    "\n",
+    "    The target architecture is {sys.platform}, take that in mind while generating the C code, specially\n",
+    "    when choosing types to use, and use the appropriate compiler flags.\n",
+    "    Make sure to use the Python C API correctly and manage memory properly to avoid leaks or crashes.\n",
+    "\n",
+    "    Here is the Python code to reimplement:\n",
+    "\n",
+    "    {python_code}\"\"\"\n",
+    "    return user_prompt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c6190659-f54c-4951-bef4-4960f8e51cc4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define function to create the messages for the LLM.\n",
+    "\n",
+    "def messages_for(python_code, module_name):\n",
+    "    \"\"\"Create the messages for the LLM given the Python code and the desired module name.\"\"\"\n",
+    "    return [\n",
+    "        {\"role\": \"system\", \"content\": system_message},\n",
+    "        {\"role\": \"user\", \"content\": user_prompt_for(python_code, module_name)}]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3c57bc55",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Test the messages function and print the messages.\n",
+    "\n",
+    "for message in messages_for(\"print('Hello World')\", \"say_hello\"):\n",
+    "    print(f\"{message['role'].upper()}: {message['content']}\")\n",
+    "    print(\"--------------------------------\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e7d2fea8-74c6-4421-8f1e-0e76d5b201b9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define optimization function using OpenAI's GPT model.\n",
+    "\n",
+    "def optimize_gpt(python_code, module_name, model=OPENAI_MODEL):\n",
+    "    \"\"\"Optimize the given Python code by generating a C extension for Python with the specified module name using the specified LLM model.\"\"\"\n",
+    "    response = openai.responses.parse(\n",
+    "        model=model,\n",
+    "        input=messages_for(python_code, module_name),\n",
+    "        text_format=Extension_codes).output_parsed\n",
+    "    return response"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c05b263a",
+   "metadata": {},
+   "source": [
+    "# Try it with a math function that calculates ***π*** using the Leibniz formula.\n",
+    "\n",
+    "This formula implies the iterative approximation of *π* using an alternating series,\n",
+    "the more iterations the more the precision but with a cost of more computation.\n",
+    "* [Leibniz formula for π](https://en.wikipedia.org/wiki/Leibniz_formula_for_%CF%80)\n",
+    "\n",
+    "This is a good candidate to get a noticeable improvement by coding and compiling it into a Python C extension. \n",
+    "\n",
+    "> NOTE:\n",
+    ">\n",
+    "> We are creating an importable module not an executable program so the code to be optimized must contain only declarations such as DEF or CLASS."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a1cbb778-fa57-43de-b04b-ed523f396c38",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define the Python function to be converted to a C extension and its module name.\n",
+    "\n",
+    "module_name = \"calculate_pi\"\n",
+    "\n",
+    "calculate_pi_code = f\"\"\"\n",
+    "def leibniz_pi(iterations):\n",
+    "    result = 1.0\n",
+    "    for i in range(1, iterations+1):\n",
+    "        j = i * 4 - 1\n",
+    "        result -= (1/j)\n",
+    "        j = i * 4 + 1\n",
+    "        result += (1/j)\n",
+    "    return result * 4\n",
+    "\"\"\"\n",
+    "\n",
+    "# Define a function to test the performance of the calculus function.\n",
+    "\n",
+    "def test_pi_calculation(calculus_function ,iterations=100_000_000):\n",
+    "    \"\"\"Test the performance of the given calculus function.\"\"\"\n",
+    "    start_time = perf_counter()\n",
+    "    result = calculus_function(iterations)\n",
+    "    end_time = perf_counter()\n",
+    "    print(f\"Result: {result:.12f}\")\n",
+    "    print(f\"Execution Time: {(end_time - start_time):.6f} seconds\")\n",
+    "\n",
+    "# Execute function declaration.\n",
+    "exec(calculate_pi_code)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7fe1cd4b-d2c5-4303-afed-2115a3fef200",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Run original python code and time it.\n",
+    "\n",
+    "test_pi_calculation(leibniz_pi, 100_000_000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4c0be0f2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Average timing the original Python code running it several times.\n",
+    "# (Increase 'iterations' for better timing)\n",
+    "\n",
+    "print(\"Timing...\")\n",
+    "iterations = 5\n",
+    "average = timeit(lambda: leibniz_pi(100_000_000), number=iterations) / iterations\n",
+    "print(f\"Python average execution time: {average:.6f} seconds\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "105db6f9-343c-491d-8e44-3a5328b81719",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Request code optimization using GPT.\n",
+    "\n",
+    "optimization = optimize_gpt(calculate_pi_code, module_name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "378981c7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Print generated extension code.\n",
+    "\n",
+    "print_optimization(optimization)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ae9a4a64",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Write the generated code to files.\n",
+    "# (Will overwrite existing files)\n",
+    "\n",
+    "write_optimization(optimization, module_name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bf8f8018-f64d-425c-a0e1-d7862aa9592d",
+   "metadata": {},
+   "source": [
+    "# Compiling C Extension and executing\n",
+    "\n",
+    "The python setup command may fail inside Jupyter lab, if that's the case try it directly on the command line.\n",
+    "\n",
+    "There are two cells with WINDOWS ONLY, those are to manage the fact windows comes with two command lines,\n",
+    "the old CMD (MS-DOS style) and the new POWERSHELL (Unix style).\n",
+    "\n",
+    "It is controlled by the COMSPEC environment variable.\\\n",
+    "*(Using this variable is completely innocuous on UNIX systems, they will simply ignore it)*\n",
+    "\n",
+    "Most of command lines present here are Unix style but the building one requires CMD so\n",
+    "we switch to CMD before compiling to later restore the preset one."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22a9130e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Clean previous builds.\n",
+    "# (Make sure to run this cell before running the compile cell a second time only)\n",
+    "# (May cast errors if no previous build exists)\n",
+    "\n",
+    "!rm -r build/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "816e7c9d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# [WINDOWS ONLY]\n",
+    "# Set COMSPEC to cmd.exe to avoid issues with some C compilers on Windows.\n",
+    "# (Remember to restore original COMSPEC after compilation and testing)\n",
+    "preset_comspec = os.environ.get(\"COMSPEC\")\n",
+    "os.environ[\"COMSPEC\"] = \"C:\\\\Windows\\\\System32\\\\cmd.exe\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4194e40c-04ab-4940-9d64-b4ad37c5bb40",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Compile the C extension.\n",
+    "# (Will fail no C compiler is installed)\n",
+    "# (In case of errors, try directly on the command line)\n",
+    "\n",
+    "!python setup.py build_ext --inplace"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8db12c4d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# [WINDOWS ONLY]\n",
+    "# Restore original COMSPEC.\n",
+    "\n",
+    "os.environ[\"COMSPEC\"] = preset_comspec"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a8f5169f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Run the usage example to test the compiled C extension.\n",
+    "exec(optimization.usage)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a1972472",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import newly created C extension and compare performance with original Python code.\n",
+    "\n",
+    "from calculate_pi import leibniz_pi as c_leibniz_pi\n",
+    "\n",
+    "print(\"Testing original Python code:\")\n",
+    "test_pi_calculation(leibniz_pi, 100_000_000)\n",
+    "print(\"Testing C extension code:\")\n",
+    "test_pi_calculation(c_leibniz_pi, 100_000_000)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ai-c-extension-generator-J3XBQkYw",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}