| import gradio as gr |
| |
| |
| from langchain_openai import ChatOpenAI |
| |
| |
| |
| import mysql.connector |
| |
| import os |
|
|
| |
| title = "CarbonQuery" |
|
|
| long_desc = """**What is ‘CarbonQuery’?** |
| ***It is an AI-powered search tool, designed to provide a platform for users to ask questions in English about projects listed on CarbonSpace. It is aimed at facilitating convenient access to information on carbon projects and directing users towards further exploration in CarbonSpace.*** |
| |
| **Guidelines for ‘CarbonQuery’: The AI-powered Search Tool for CarbonSpace:** |
| |
| 1. Users may ask questions relevant to CarbonSpace that are similar in nature and complexity to the sample questions provided. |
| |
| 2. ‘CarbonQuery’ can respond to questions limited to the following attributes: |
| - Project ID (i.e. Registry ID) |
| - Project Name |
| - Verifier Validator Name |
| - Crediting Start Date |
| - Crediting End Date |
| - Estimated Emission Reduction |
| - Generated Credits |
| - Retired Credits |
| - Buffer Credit Amount |
| - Region (Africa, Asia, North America, South America, Europe, Middle East, Oceania) |
| - Country |
| - State |
| - Registry Name (Verra, Gold Standard, Climate Action Reserve, Clean Development Mechanism, American Carbon Registry) |
| - Project Status (E.g. Under Development, Under Validation, Rejected, Registered, Registration Requested, Verification Approved, On Hold) |
| - Project Sectoral Scope (Nature Based, Energy, Household, Industrial, Waste Management, Transport, Other) |
| """ |
|
|
| def carbon_space(question): |
| llm = ChatOpenAI( |
| model="gpt-4o-mini", |
| temperature=0.0001, |
| openai_api_key = os.getenv('OPEN_AI') |
| ) |
|
|
| db = mysql.connector.connect( |
| host="3.130.217.226", |
| user="csp_db_ext", |
| password= os.getenv('DB'), |
| database="scrapped_csp_db" |
| ) |
|
|
| db_cursor = db.cursor() |
| PROMPT_1 = f""" |
| You are an intelligent agent who has knowledge on SQL and databases. Our database has a table called |
| "Search_Tool_Data_V3". Users raise questions about this table. |
| You need to elaborate on the questions raised by users. The "Search_Tool_Data_V3" table has the |
| following columns. |
| project_id - primary key of the table, |
| registry_id - main id of the project, |
| project_name - name of the project, |
| verifier_validator_name - name of the verifier/validator, |
| crediting_start_date - start date of crediting, |
| crediting_end_date - end date of crediting, |
| estimated_emission_reduction - estimated credits, |
| generated_credits - issued credits, |
| retired_credits - retired credits, |
| buffer_credit_amount - buffer credits, |
| region_name - region is the part of the world that project exists. |
| Available regions in this table are: |
| Africa |
| Asia |
| North America |
| South America |
| Europe |
| Middle East |
| Oceania |
| Also, users may consider synonyms as well for regions. Ex: Latin America |
| country_name - name of the country where the project exists. Some of the available countries are: |
| Turkey |
| India |
| Lao |
| Kenya |
| Zambia |
| South Africa |
| Congo |
| Madagascar |
| Ethiopia |
| China |
| Eritrea |
| Bangladesh |
| Gambia |
| Myanmar |
| Honduras |
| Rwanda |
| Uganda |
| Indonesia |
| Sierra Leone |
| Malawi |
| Mozambique |
| Burkina Faso |
| Vietnam |
| Thailand |
| Nigeria |
| Nepal |
| Nicaragua |
| Guatemala |
| Togo |
| Philippines |
| Panama |
| Guinea |
| Colombia |
| Canada |
| Papua New Guinea |
| Argentina |
| Burundi |
| United republic of Tanzania |
| Mexico |
| Switzerland |
| Bolivia |
| Aruba |
| Somalia |
| Peru |
| Sri Lanka |
| Cameroon |
| Benin |
| Mongolia |
| Timor-Leste |
| Zimbabwe |
| Brazil |
| Pakistan |
| Mali |
| Egypt |
| Senegal |
| Georgia |
| New Zealand |
| Bulgaria |
| Chile |
| Lesotho |
| Congo, The Democratic Republic of The |
| Germany |
| Sudan |
| Ghana |
| United States |
| Cambodia |
| Taiwan |
| Dominican Republic |
| Iraq |
| Costa Rica |
| Morocco |
| Haiti |
| Romania |
| Russian Federation |
| United Kingdom |
| Paraguay |
| Tanzania |
| Netherlands |
| Uruguay |
| El Salvador |
| Tajikistan |
| Namibia |
| Australia |
| Malaysia |
| South Korea |
| Turkmenistan |
| Oman |
| Liberia |
| Singapore |
| Belize |
| Comoros |
| Central African Republic |
| Kazakhstan |
| Mauritania |
| Niger |
| United Arab Emirates |
| Iceland |
| Mauritius |
| Israel |
| Ecuador |
| Angola |
| Guinea-Bissau |
| France |
| Qatar |
| Republic of Korea |
| Cuba |
| Kuwait |
| Fiji |
| Cape Verde |
| Saudi Arabia |
| Jordan |
| Armenia |
| Iran |
| Tunisia |
| Cyprus |
| Uzbekistan |
| Azerbaijan |
| Albania |
| Democratic People's Republic of Korea |
| Republic of Moldova |
| Bhutan |
| Jamaica |
| Democratic Republic of the Congo |
| state_province_name - state or province where the project exists, |
| registry_name - name of the registry. Existing registries are: |
| Verra |
| Gold Standard |
| Climate Action Reserve |
| Clean Development Mechanism |
| American Carbon Registry |
| project_status_name - status of the project. Some of the available statues are: |
| Under Development |
| Under Validation |
| Minor Corrections |
| Rejected |
| Registered |
| Verification Approved |
| Registration Requested |
| On Hold |
| sectoral_scopes - comma separated names of project sectoral scopes. Existing sectoral scopes are: |
| Nature Based |
| Energy |
| Household |
| Industrial |
| Waste Management |
| Transport |
| Other |
| |
| This is the question raised by the user "{question}". Give a binary answer "YES" or "NO" whether this |
| question relates to this table or not. No full stops at the end. |
| An example for irrelevant question is "how are you". |
| """ |
| routing_direction = (llm.invoke(PROMPT_1)).content |
| if routing_direction == "YES": |
| PROMPT_2 = f""" |
| You are an intelligent agent who has knowledge on SQL and databases. Our database has a table called |
| "Search_Tool_Data_V3". Users raise questions about this table. |
| You need to elaborate on the questions raised by users. |
| The "Search_Tool_Data_V3" table has the following columns: |
| project_id - primary key of the table, |
| registry_id - main id of the project, |
| project_name - name of the project, |
| verifier_validator_name - name of the verifier/validator, |
| crediting_start_date - start date of crediting, |
| crediting_end_date - end date of crediting, |
| estimated_emission_reduction - estimated credits, |
| generated_credits - issued credits, |
| retired_credits - retired credits, |
| buffer_credit_amount - buffer credits, |
| region_name - region is the part of the world that project exists. |
| Available regions in this table are: |
| Africa |
| Asia |
| North America |
| South America |
| Europe |
| Middle East |
| Oceania |
| country_name - name of the country where the project exists. |
| Some of the available countries are: |
| Turkey, |
| India, |
| Kenya, |
| South Africa, etc |
| state_province_name - state or province where the project exists, |
| registry_name - name of the registry. Existing registries are: |
| Verra |
| Gold Standard |
| Climate Action Reserve |
| Clean Development Mechanism |
| American Carbon Registry |
| project_status_name - status of the project. Some of the available statues are: |
| Under Development |
| Under Validation |
| Minor Corrections |
| Rejected |
| Registered |
| Verification Approved |
| Registration Requested |
| On Hold |
| sectoral_scopes - comma separated names of project sectoral scopes. Existing sectoral scopes are: |
| Nature Based |
| Energy |
| Household |
| Industrial |
| Waste Management |
| Transport |
| Other |
| |
| This is the question raised by the user "{question}". |
| Provide a more descriptive question based on the given information. |
| """ |
| background_knowledge = (llm.invoke(PROMPT_2)).content |
| print('----background_knowledge----') |
| print(background_knowledge) |
| PROMPT_3 = f""" |
| You are an intelligent agent who has knowledge on SQL and databases. |
| Our database has a table called "Search_Tool_Data_V3". |
| Users raise questions about this table. |
| You need to elaborate on the questions raised by users. |
| The "Search_Tool_Data_V3" table has the following columns: |
| project_id - primary key of the table, |
| registry_id - main id of the project, |
| project_name - name of the project, |
| verifier_validator_name - name of the verifier/validator, |
| crediting_start_date - start date of crediting, |
| crediting_end_date - end date of crediting, |
| estimated_emission_reduction - estimated credits, |
| generated_credits - issued credits, |
| retired_credits - retired credits, |
| buffer_credit_amount - buffer credits, |
| region_name - region is the part of the world where the project exists. |
| Available regions in this table are: |
| Africa |
| Asia |
| North America |
| South America |
| Europe |
| Middle East |
| Oceania |
| country_name - name of the country where the project exists. Some of the available countries are: |
| Turkey, |
| India, |
| Kenya, |
| South Africa, etc |
| state_province_name - state or province where the project exists, |
| registry_name - name of the registry. Existing registries are: |
| Verra |
| Gold Standard |
| Climate Action Reserve |
| Clean Development Mechanism |
| American Carbon Registry |
| project_status_name - status of the project. Some of the available statues are: |
| Under Development |
| Under Validation |
| Minor Corrections |
| Rejected |
| Registered |
| Verification Approved |
| Registration Requested |
| On Hold |
| sectoral_scopes - comma separated names of project sectoral scopes. Existing sectoral scope names are: |
| Nature Based |
| Energy |
| Household |
| Industrial |
| Waste Management |
| Transport |
| Other |
| |
| This is the question raised by the user "{question}". Provide a more descriptive question based on the given information. |
| If you think this question doesn't relate to this table please tell us. Give only the modified question. |
| The modified question is |
| """ |
| new_question = (llm.invoke(PROMPT_3)).content |
| print('----new_question----') |
| print(new_question) |
| PROMPT_4 = f""" |
| [QUESTION]: {background_knowledge} |
| [TABLE_NAME]: Search_Tool_Data_V3 |
| [TABLE_COLUMNS]: project_id, registry_id, project_name, verifier_validator_name, crediting_start_date, crediting_end_date, estimated_emission_reduction, |
| generated_credits, retired_credits, buffer_credit_amount, region_name, country_name, state_province_name, registry_name, |
| project_status_name, sectoral_scopes |
| [COLUMN_DEFINITIONS]: project_id - primary key of the table, registry_id - main id of the project, project_name - name of the project, verifier_validator_name - name of the verifier/validator, |
| crediting_start_date - start date of crediting, crediting_end_date - end date of crediting, estimated_emission_reduction - estimated credits, |
| generated_credits - issued credits, retired_credits - retired credits, buffer_credit_amount - buffer credits, |
| region_name - region is the part of the world where the project exists. Available regions are in this table = Africa, Asia, |
| North America, South America, Europe, Middle East, and Oceania, |
| country_name - name of the country where the project exists, state_province_name - state or province where the project exists, |
| registry_name - name of the registry. existing registries are Verra, Gold Standard, Climate Action Reserve, Clean Development |
| Mechanism, American Carbon Registry, project_status_name - status of the project, |
| sectoral_scopes - comma separated names of project sectoral scopes. Available sectoral scope names are in this table = Nature Based, Energy, Household, Industrial, Waste Management, Transport, Other |
| [TASK]: You are a SQL expert. Give me a list of TABLE_COLUMNS that are referred in the QUESTION |
| Here are some examples for you: |
| [EX1] |
| [INPUT]: How many credits have been issued in Africa? |
| [OUTPUT]: [TABLE_COLUMNS]: registry_id, generated_credits, region_name |
| [EX2] |
| [INPUT]: What is the highest amount of buffer credits in Gold Standard projects ? |
| [OUTPUT]: [TABLE_COLUMNS]: buffer_credit_amount, registry_name |
| [EX3] |
| [INPUT]: How many projects have started crediting before 2022? |
| [OUTPUT]: [TABLE_COLUMNS]: registry_id, generated_credits, crediting_start_date |
| [EX4] |
| [INPUT]: What is the largest energy project? |
| [OUTPUT]: [TABLE_COLUMNS]: registry_id, project_name, generated_credits, sectoral_scopes |
| [EX5] |
| [INPUT]: How many projects are under validation? |
| [OUTPUT]: [TABLE_COLUMNS]: registry_id, project_status_name |
| |
| Now give the answer. |
| [OUTPUT]: [TABLE_COLUMNS]: |
| """ |
| columns = (llm.invoke(PROMPT_4)).content |
| print('----selected columns----') |
| print(columns) |
|
|
|
|
| PROMPT_5 = f""" |
| [QUESTION]: {new_question} |
| [TABLE_NAME]: Search_Tool_Data_V3 |
| [REQUIRED_COLUMNS]: {columns} |
| [TASK]: You are a SQL expert. Give me the SQL query for the above table based on QUESTION. |
| [EX1] |
| [INPUT]: |
| [QUESTION]:How many credits have been issued in Oceania? |
| [TABLE_NAME]: Search_Tool_Data_V3 |
| [REQUIRED_COLUMNS]: generated_credits, region_name |
| [OUTPUT]: SELECT SUM(generated_credits) AS total_credits_issued FROM Search_Tool_Data_V3 WHERE region_name = 'Oceania'; |
| [EX2] |
| [INPUT]: |
| [QUESTION]:How many Transport projects are in Verra? |
| [TABLE_NAME]: Search_Tool_Data_V3 |
| [REQUIRED_COLUMNS]: registry_id, sectoral_scopes, registry_name |
| [OUTPUT]: SELECT COUNT(registry_id) AS count FROM Search_Tool_Data_V3 WHERE sectoral_scopes LIKE '%Transport%' AND registry_name = 'Verra'; |
| [EX3] |
| [INPUT]: |
| [QUESTION]:Which projects have crediting periods more than 90 years? |
| [TABLE_NAME]: Search_Tool_Data_V3 |
| [REQUIRED_COLUMNS]: registry_id, project_name, crediting_start_date, crediting_end_date |
| [OUTPUT]:SELECT registry_id, project_name, TIMESTAMPDIFF(YEAR, crediting_start_date, crediting_end_date) AS crediting_period_years FROM Search_Tool_Data_V3 WHERE TIMESTAMPDIFF(YEAR, crediting_start_date, crediting_end_date) > 90 ORDER BY crediting_period_years DESC LIMIT 5; |
| [EX4] |
| [INPUT]: |
| [QUESTION]:What are the 3 smallest rejected projects? |
| [TABLE_NAME]: Search_Tool_Data_V3 |
| [REQUIRED_COLUMNS]: registry_id, project_name, project_status_name, generated_credits |
| [OUTPUT]: SELECT registry_id, project_name, generated_credits FROM Search_Tool_Data_V3 WHERE project_status_name = 'Rejected' ORDER BY generated_credits ASC LIMIT 3; |
| [EX5] |
| [INPUT]: |
| [QUESTION]:What is the status of the project with id 4918? |
| [TABLE_NAME]: Search_Tool_Data_V3 |
| [REQUIRED_COLUMNS]: registry_id, project_name, project_status_name |
| [OUTPUT]:SELECT registry_id, project_name, project_status_name FROM Search_Tool_Data_V3 WHERE registry_id = 4918; |
| |
| Now give the answer. Give only the SQL query. Remove ```sql prefix and ``` suffix |
| """ |
| query = (llm.invoke(PROMPT_5)).content |
| print('----query----') |
| print(query) |
| try: |
| db_cursor.execute(query) |
| answer = db_cursor.fetchall() |
| except Exception as e: |
| print("An Exception has occurred during SQL generation. The system will retry") |
| PROMPT_6 = f""" |
| You are a SQL expert. This SQL statement {query} throws this exception {e}. Please fix this. Only give the SQL statement. |
| Remove ```sql prefix and ``` suffix |
| """ |
| refine_query = (llm.invoke(PROMPT_6)).content |
| print('----refined_query----') |
| print(refine_query) |
| try: |
| db_cursor.close() |
| db = mysql.connector.connect( |
| host="3.130.217.226", |
| user="csp_db_ext", |
| password= os.getenv('DB'), |
| database="scrapped_csp_db" |
| ) |
| db_cursor = db.cursor() |
| db_cursor.execute(refine_query) |
| answer = db_cursor.fetchall() |
| except Exception as e: |
| print(e) |
| answer = "Apologies, the system is unable to provide an answer due to an internal failure." |
| PROMPT_7 = f""" |
| This is the question you have {question}. This is the answer {answer} for the question and the answer was obtained from this query {query}. |
| Give the final answer as a single sentence. If the user's question specifies a desired number of results, return the specified number of results in the final answer. |
| However, if no number is specified, use a default limit of 5 results. Here are some examples for you: |
| [EX1] |
| [QUESTION]: What are the 3 largest projects in India? |
| [SQL QUERY]: SELECT registry_id, project_name, generated_credits FROM Search_Tool_Data_V3 where country_name = 'India' ORDER BY generated_credits DESC LIMIT 3; |
| [SQL ANSWER]: [('1', 'Project for GHG emission reduction by thermal oxidation of HFC 23 in Gujarat, India.', 56704038), ('115', 'GHG emission reduction by thermal oxidation of HFC 23 at refrigerant (HCFC-22) manufacturing facility of SRF Ltd', 29688497), ('1742', 'Hydroelectric Project in Kinnaur District in Himachal Pradesh', 29486267)] |
| [FINAL ANSWER]: The 3 largest projects in India based on total issued credits are: |
| 1. 'Project for GHG emission reduction by thermal oxidation of HFC 23 in Gujarat, India.', registry_id = 1, total credits issued = 56,704,038 |
| 2. 'GHG emission reduction by thermal oxidation of HFC 23 at refrigerant (HCFC-22) manufacturing facility of SRF Ltd', registry_id = 115, total credits issued = 29,688,497 |
| 3. 'Hydroelectric Project in Kinnaur District in Himachal Pradesh', registry_id = 1742, total credits issued = 29,486,267 |
| |
| [EX2] |
| [QUESTION]: What are the largest projects in Asia? |
| [SQL QUERY]: SELECT registry_id, project_name, generated_credits FROM Search_Tool_Data_V3 where region_name = 'Asia' ORDER BY generated_credits DESC LIMIT 5; |
| [SQL ANSWER]: [('99', 'N2O Emission Reduction in Onsan, Republic of Korea', 118024575), ('306', 'Project for HFC23 Decomposition at Changshu 3F Zhonghao New Chemical Materials Co. Ltd, Changshu, Jiangsu Province, China', 63011172), ('1238', 'N2O decomposition project of PetroChina Company Limited Liaoyang Petrochemical Company', 60359870), ('232', 'Shandong Dongyue HFC23 Decomposition Project', 59508846'), ('1', 'Project for GHG emission reduction by thermal oxidation of HFC 23 in Gujarat, India.', 56704038)] |
| [FINAL ANSWER]: The largest projects in India based on total issued credits are: |
| 1. 'N2O Emission Reduction in Onsan, Republic of Korea', registry_id = 99, total credits issued = 118,024,575 |
| 2. 'Project for HFC23 Decomposition at Changshu 3F Zhonghao New Chemical Materials Co. Ltd, Changshu, Jiangsu Province, China', registry_id = 306, total credits issued = 63,011,172 |
| 3. 'N2O decomposition project of PetroChina Company Limited Liaoyang Petrochemical Company', registry_id = 1238, total credits issued = 60,359,870 |
| 4. 'Shandong Dongyue HFC23 Decomposition Project', registry_id = 232, total credits issued = 59508846' |
| 5. 'Project for GHG emission reduction by thermal oxidation of HFC 23 in Gujarat, India.', registry_id = 1, total credits issued = 56,704,038 |
| |
| Important: If the query includes a COUNT clause, do not limit the number of results, regardless of whether the user specifies a number or not. |
| """ |
| final_answer = (llm.invoke(PROMPT_7)).content |
| elif routing_direction == "NO": |
| PROMPT_8 = f""" |
| This question is not related to this tool. The question is {question}. Please ask user to raise a relevant question. |
| """ |
| final_answer = (llm.invoke(PROMPT_8)).content |
| print('----Answer----', final_answer) |
| print('\n') |
| return final_answer |
|
|
| if __name__=="__main__": |
| print("--------App Loading------") |
| app = gr.Interface(fn=carbon_space, |
| inputs="text", |
| outputs="text", |
| theme=gr.themes.Glass(), |
| examples=[ |
| "How many credits have been issued in the region South America?", |
| "What is the largest project in terms of credits issued?", |
| "What is the project with the latest crediting start date?", |
| "What registry has the highest number of retired credits?", |
| "What project has issued the largest buffer credits?", |
| "What is the status of project with registry id 4435?", |
| "How many nature projects are in Africa?"], |
| title=title, |
| article=long_desc, |
| delete_cache=(10, 10) |
| ) |
| try: |
| app.launch() |
| print("--------App Loaded------") |
| finally: |
| app.deconstruct() |
| |
| |
| |