AmaRanawaka's picture
Update app.py
cab6424 verified
import gradio as gr
# import pandas as pd
# import statistics
from langchain_openai import ChatOpenAI
# from langchain_community.utilities import SQLDatabase
# from langchain.agents import create_sql_agent
# import urllib.parse
import mysql.connector
# import re
import os
# Meta data
title = "CarbonQuery"
long_desc = """**What is ‘CarbonQuery’?**
***It is an AI-powered search tool, designed to provide a platform for users to ask questions in English about projects listed on CarbonSpace. It is aimed at facilitating convenient access to information on carbon projects and directing users towards further exploration in CarbonSpace.***
**Guidelines for ‘CarbonQuery’: The AI-powered Search Tool for CarbonSpace:**
1. Users may ask questions relevant to CarbonSpace that are similar in nature and complexity to the sample questions provided.
2. ‘CarbonQuery’ can respond to questions limited to the following attributes:
- Project ID (i.e. Registry ID)
- Project Name
- Verifier Validator Name
- Crediting Start Date
- Crediting End Date
- Estimated Emission Reduction
- Generated Credits
- Retired Credits
- Buffer Credit Amount
- Region (Africa, Asia, North America, South America, Europe, Middle East, Oceania)
- Country
- State
- Registry Name (Verra, Gold Standard, Climate Action Reserve, Clean Development Mechanism, American Carbon Registry)
- Project Status (E.g. Under Development, Under Validation, Rejected, Registered, Registration Requested, Verification Approved, On Hold)
- Project Sectoral Scope (Nature Based, Energy, Household, Industrial, Waste Management, Transport, Other)
"""
def carbon_space(question):
llm = ChatOpenAI(
model="gpt-4o-mini",
temperature=0.0001,
openai_api_key = os.getenv('OPEN_AI')
)
db = mysql.connector.connect(
host="3.130.217.226",
user="csp_db_ext",
password= os.getenv('DB'),
database="scrapped_csp_db"
)
db_cursor = db.cursor()
PROMPT_1 = f"""
You are an intelligent agent who has knowledge on SQL and databases. Our database has a table called
"Search_Tool_Data_V3". Users raise questions about this table.
You need to elaborate on the questions raised by users. The "Search_Tool_Data_V3" table has the
following columns.
project_id - primary key of the table,
registry_id - main id of the project,
project_name - name of the project,
verifier_validator_name - name of the verifier/validator,
crediting_start_date - start date of crediting,
crediting_end_date - end date of crediting,
estimated_emission_reduction - estimated credits,
generated_credits - issued credits,
retired_credits - retired credits,
buffer_credit_amount - buffer credits,
region_name - region is the part of the world that project exists.
Available regions in this table are:
Africa
Asia
North America
South America
Europe
Middle East
Oceania
Also, users may consider synonyms as well for regions. Ex: Latin America
country_name - name of the country where the project exists. Some of the available countries are:
Turkey
India
Lao
Kenya
Zambia
South Africa
Congo
Madagascar
Ethiopia
China
Eritrea
Bangladesh
Gambia
Myanmar
Honduras
Rwanda
Uganda
Indonesia
Sierra Leone
Malawi
Mozambique
Burkina Faso
Vietnam
Thailand
Nigeria
Nepal
Nicaragua
Guatemala
Togo
Philippines
Panama
Guinea
Colombia
Canada
Papua New Guinea
Argentina
Burundi
United republic of Tanzania
Mexico
Switzerland
Bolivia
Aruba
Somalia
Peru
Sri Lanka
Cameroon
Benin
Mongolia
Timor-Leste
Zimbabwe
Brazil
Pakistan
Mali
Egypt
Senegal
Georgia
New Zealand
Bulgaria
Chile
Lesotho
Congo, The Democratic Republic of The
Germany
Sudan
Ghana
United States
Cambodia
Taiwan
Dominican Republic
Iraq
Costa Rica
Morocco
Haiti
Romania
Russian Federation
United Kingdom
Paraguay
Tanzania
Netherlands
Uruguay
El Salvador
Tajikistan
Namibia
Australia
Malaysia
South Korea
Turkmenistan
Oman
Liberia
Singapore
Belize
Comoros
Central African Republic
Kazakhstan
Mauritania
Niger
United Arab Emirates
Iceland
Mauritius
Israel
Ecuador
Angola
Guinea-Bissau
France
Qatar
Republic of Korea
Cuba
Kuwait
Fiji
Cape Verde
Saudi Arabia
Jordan
Armenia
Iran
Tunisia
Cyprus
Uzbekistan
Azerbaijan
Albania
Democratic People's Republic of Korea
Republic of Moldova
Bhutan
Jamaica
Democratic Republic of the Congo
state_province_name - state or province where the project exists,
registry_name - name of the registry. Existing registries are:
Verra
Gold Standard
Climate Action Reserve
Clean Development Mechanism
American Carbon Registry
project_status_name - status of the project. Some of the available statues are:
Under Development
Under Validation
Minor Corrections
Rejected
Registered
Verification Approved
Registration Requested
On Hold
sectoral_scopes - comma separated names of project sectoral scopes. Existing sectoral scopes are:
Nature Based
Energy
Household
Industrial
Waste Management
Transport
Other
This is the question raised by the user "{question}". Give a binary answer "YES" or "NO" whether this
question relates to this table or not. No full stops at the end.
An example for irrelevant question is "how are you".
"""
routing_direction = (llm.invoke(PROMPT_1)).content
if routing_direction == "YES":
PROMPT_2 = f"""
You are an intelligent agent who has knowledge on SQL and databases. Our database has a table called
"Search_Tool_Data_V3". Users raise questions about this table.
You need to elaborate on the questions raised by users.
The "Search_Tool_Data_V3" table has the following columns:
project_id - primary key of the table,
registry_id - main id of the project,
project_name - name of the project,
verifier_validator_name - name of the verifier/validator,
crediting_start_date - start date of crediting,
crediting_end_date - end date of crediting,
estimated_emission_reduction - estimated credits,
generated_credits - issued credits,
retired_credits - retired credits,
buffer_credit_amount - buffer credits,
region_name - region is the part of the world that project exists.
Available regions in this table are:
Africa
Asia
North America
South America
Europe
Middle East
Oceania
country_name - name of the country where the project exists.
Some of the available countries are:
Turkey,
India,
Kenya,
South Africa, etc
state_province_name - state or province where the project exists,
registry_name - name of the registry. Existing registries are:
Verra
Gold Standard
Climate Action Reserve
Clean Development Mechanism
American Carbon Registry
project_status_name - status of the project. Some of the available statues are:
Under Development
Under Validation
Minor Corrections
Rejected
Registered
Verification Approved
Registration Requested
On Hold
sectoral_scopes - comma separated names of project sectoral scopes. Existing sectoral scopes are:
Nature Based
Energy
Household
Industrial
Waste Management
Transport
Other
This is the question raised by the user "{question}".
Provide a more descriptive question based on the given information.
"""
background_knowledge = (llm.invoke(PROMPT_2)).content
print('----background_knowledge----')
print(background_knowledge)
PROMPT_3 = f"""
You are an intelligent agent who has knowledge on SQL and databases.
Our database has a table called "Search_Tool_Data_V3".
Users raise questions about this table.
You need to elaborate on the questions raised by users.
The "Search_Tool_Data_V3" table has the following columns:
project_id - primary key of the table,
registry_id - main id of the project,
project_name - name of the project,
verifier_validator_name - name of the verifier/validator,
crediting_start_date - start date of crediting,
crediting_end_date - end date of crediting,
estimated_emission_reduction - estimated credits,
generated_credits - issued credits,
retired_credits - retired credits,
buffer_credit_amount - buffer credits,
region_name - region is the part of the world where the project exists.
Available regions in this table are:
Africa
Asia
North America
South America
Europe
Middle East
Oceania
country_name - name of the country where the project exists. Some of the available countries are:
Turkey,
India,
Kenya,
South Africa, etc
state_province_name - state or province where the project exists,
registry_name - name of the registry. Existing registries are:
Verra
Gold Standard
Climate Action Reserve
Clean Development Mechanism
American Carbon Registry
project_status_name - status of the project. Some of the available statues are:
Under Development
Under Validation
Minor Corrections
Rejected
Registered
Verification Approved
Registration Requested
On Hold
sectoral_scopes - comma separated names of project sectoral scopes. Existing sectoral scope names are:
Nature Based
Energy
Household
Industrial
Waste Management
Transport
Other
This is the question raised by the user "{question}". Provide a more descriptive question based on the given information.
If you think this question doesn't relate to this table please tell us. Give only the modified question.
The modified question is
"""
new_question = (llm.invoke(PROMPT_3)).content
print('----new_question----')
print(new_question)
PROMPT_4 = f"""
[QUESTION]: {background_knowledge}
[TABLE_NAME]: Search_Tool_Data_V3
[TABLE_COLUMNS]: project_id, registry_id, project_name, verifier_validator_name, crediting_start_date, crediting_end_date, estimated_emission_reduction,
generated_credits, retired_credits, buffer_credit_amount, region_name, country_name, state_province_name, registry_name,
project_status_name, sectoral_scopes
[COLUMN_DEFINITIONS]: project_id - primary key of the table, registry_id - main id of the project, project_name - name of the project, verifier_validator_name - name of the verifier/validator,
crediting_start_date - start date of crediting, crediting_end_date - end date of crediting, estimated_emission_reduction - estimated credits,
generated_credits - issued credits, retired_credits - retired credits, buffer_credit_amount - buffer credits,
region_name - region is the part of the world where the project exists. Available regions are in this table = Africa, Asia,
North America, South America, Europe, Middle East, and Oceania,
country_name - name of the country where the project exists, state_province_name - state or province where the project exists,
registry_name - name of the registry. existing registries are Verra, Gold Standard, Climate Action Reserve, Clean Development
Mechanism, American Carbon Registry, project_status_name - status of the project,
sectoral_scopes - comma separated names of project sectoral scopes. Available sectoral scope names are in this table = Nature Based, Energy, Household, Industrial, Waste Management, Transport, Other
[TASK]: You are a SQL expert. Give me a list of TABLE_COLUMNS that are referred in the QUESTION
Here are some examples for you:
[EX1]
[INPUT]: How many credits have been issued in Africa?
[OUTPUT]: [TABLE_COLUMNS]: registry_id, generated_credits, region_name
[EX2]
[INPUT]: What is the highest amount of buffer credits in Gold Standard projects ?
[OUTPUT]: [TABLE_COLUMNS]: buffer_credit_amount, registry_name
[EX3]
[INPUT]: How many projects have started crediting before 2022?
[OUTPUT]: [TABLE_COLUMNS]: registry_id, generated_credits, crediting_start_date
[EX4]
[INPUT]: What is the largest energy project?
[OUTPUT]: [TABLE_COLUMNS]: registry_id, project_name, generated_credits, sectoral_scopes
[EX5]
[INPUT]: How many projects are under validation?
[OUTPUT]: [TABLE_COLUMNS]: registry_id, project_status_name
Now give the answer.
[OUTPUT]: [TABLE_COLUMNS]:
"""
columns = (llm.invoke(PROMPT_4)).content
print('----selected columns----')
print(columns)
PROMPT_5 = f"""
[QUESTION]: {new_question}
[TABLE_NAME]: Search_Tool_Data_V3
[REQUIRED_COLUMNS]: {columns}
[TASK]: You are a SQL expert. Give me the SQL query for the above table based on QUESTION.
[EX1]
[INPUT]:
[QUESTION]:How many credits have been issued in Oceania?
[TABLE_NAME]: Search_Tool_Data_V3
[REQUIRED_COLUMNS]: generated_credits, region_name
[OUTPUT]: SELECT SUM(generated_credits) AS total_credits_issued FROM Search_Tool_Data_V3 WHERE region_name = 'Oceania';
[EX2]
[INPUT]:
[QUESTION]:How many Transport projects are in Verra?
[TABLE_NAME]: Search_Tool_Data_V3
[REQUIRED_COLUMNS]: registry_id, sectoral_scopes, registry_name
[OUTPUT]: SELECT COUNT(registry_id) AS count FROM Search_Tool_Data_V3 WHERE sectoral_scopes LIKE '%Transport%' AND registry_name = 'Verra';
[EX3]
[INPUT]:
[QUESTION]:Which projects have crediting periods more than 90 years?
[TABLE_NAME]: Search_Tool_Data_V3
[REQUIRED_COLUMNS]: registry_id, project_name, crediting_start_date, crediting_end_date
[OUTPUT]:SELECT registry_id, project_name, TIMESTAMPDIFF(YEAR, crediting_start_date, crediting_end_date) AS crediting_period_years FROM Search_Tool_Data_V3 WHERE TIMESTAMPDIFF(YEAR, crediting_start_date, crediting_end_date) > 90 ORDER BY crediting_period_years DESC LIMIT 5;
[EX4]
[INPUT]:
[QUESTION]:What are the 3 smallest rejected projects?
[TABLE_NAME]: Search_Tool_Data_V3
[REQUIRED_COLUMNS]: registry_id, project_name, project_status_name, generated_credits
[OUTPUT]: SELECT registry_id, project_name, generated_credits FROM Search_Tool_Data_V3 WHERE project_status_name = 'Rejected' ORDER BY generated_credits ASC LIMIT 3;
[EX5]
[INPUT]:
[QUESTION]:What is the status of the project with id 4918?
[TABLE_NAME]: Search_Tool_Data_V3
[REQUIRED_COLUMNS]: registry_id, project_name, project_status_name
[OUTPUT]:SELECT registry_id, project_name, project_status_name FROM Search_Tool_Data_V3 WHERE registry_id = 4918;
Now give the answer. Give only the SQL query. Remove ```sql prefix and ``` suffix
"""
query = (llm.invoke(PROMPT_5)).content
print('----query----')
print(query)
try:
db_cursor.execute(query)
answer = db_cursor.fetchall()
except Exception as e:
print("An Exception has occurred during SQL generation. The system will retry")
PROMPT_6 = f"""
You are a SQL expert. This SQL statement {query} throws this exception {e}. Please fix this. Only give the SQL statement.
Remove ```sql prefix and ``` suffix
"""
refine_query = (llm.invoke(PROMPT_6)).content
print('----refined_query----')
print(refine_query)
try:
db_cursor.close()
db = mysql.connector.connect(
host="3.130.217.226",
user="csp_db_ext",
password= os.getenv('DB'),
database="scrapped_csp_db"
)
db_cursor = db.cursor()
db_cursor.execute(refine_query)
answer = db_cursor.fetchall()
except Exception as e:
print(e)
answer = "Apologies, the system is unable to provide an answer due to an internal failure."
PROMPT_7 = f"""
This is the question you have {question}. This is the answer {answer} for the question and the answer was obtained from this query {query}.
Give the final answer as a single sentence. If the user's question specifies a desired number of results, return the specified number of results in the final answer.
However, if no number is specified, use a default limit of 5 results. Here are some examples for you:
[EX1]
[QUESTION]: What are the 3 largest projects in India?
[SQL QUERY]: SELECT registry_id, project_name, generated_credits FROM Search_Tool_Data_V3 where country_name = 'India' ORDER BY generated_credits DESC LIMIT 3;
[SQL ANSWER]: [('1', 'Project for GHG emission reduction by thermal oxidation of HFC 23 in Gujarat, India.', 56704038), ('115', 'GHG emission reduction by thermal oxidation of HFC 23 at refrigerant (HCFC-22) manufacturing facility of SRF Ltd', 29688497), ('1742', 'Hydroelectric Project in Kinnaur District in Himachal Pradesh', 29486267)]
[FINAL ANSWER]: The 3 largest projects in India based on total issued credits are:
1. 'Project for GHG emission reduction by thermal oxidation of HFC 23 in Gujarat, India.', registry_id = 1, total credits issued = 56,704,038
2. 'GHG emission reduction by thermal oxidation of HFC 23 at refrigerant (HCFC-22) manufacturing facility of SRF Ltd', registry_id = 115, total credits issued = 29,688,497
3. 'Hydroelectric Project in Kinnaur District in Himachal Pradesh', registry_id = 1742, total credits issued = 29,486,267
[EX2]
[QUESTION]: What are the largest projects in Asia?
[SQL QUERY]: SELECT registry_id, project_name, generated_credits FROM Search_Tool_Data_V3 where region_name = 'Asia' ORDER BY generated_credits DESC LIMIT 5;
[SQL ANSWER]: [('99', 'N2O Emission Reduction in Onsan, Republic of Korea', 118024575), ('306', 'Project for HFC23 Decomposition at Changshu 3F Zhonghao New Chemical Materials Co. Ltd, Changshu, Jiangsu Province, China', 63011172), ('1238', 'N2O decomposition project of PetroChina Company Limited Liaoyang Petrochemical Company', 60359870), ('232', 'Shandong Dongyue HFC23 Decomposition Project', 59508846'), ('1', 'Project for GHG emission reduction by thermal oxidation of HFC 23 in Gujarat, India.', 56704038)]
[FINAL ANSWER]: The largest projects in India based on total issued credits are:
1. 'N2O Emission Reduction in Onsan, Republic of Korea', registry_id = 99, total credits issued = 118,024,575
2. 'Project for HFC23 Decomposition at Changshu 3F Zhonghao New Chemical Materials Co. Ltd, Changshu, Jiangsu Province, China', registry_id = 306, total credits issued = 63,011,172
3. 'N2O decomposition project of PetroChina Company Limited Liaoyang Petrochemical Company', registry_id = 1238, total credits issued = 60,359,870
4. 'Shandong Dongyue HFC23 Decomposition Project', registry_id = 232, total credits issued = 59508846'
5. 'Project for GHG emission reduction by thermal oxidation of HFC 23 in Gujarat, India.', registry_id = 1, total credits issued = 56,704,038
Important: If the query includes a COUNT clause, do not limit the number of results, regardless of whether the user specifies a number or not.
"""
final_answer = (llm.invoke(PROMPT_7)).content
elif routing_direction == "NO":
PROMPT_8 = f"""
This question is not related to this tool. The question is {question}. Please ask user to raise a relevant question.
"""
final_answer = (llm.invoke(PROMPT_8)).content
print('----Answer----', final_answer)
print('\n')
return final_answer
if __name__=="__main__":
print("--------App Loading------")
app = gr.Interface(fn=carbon_space,
inputs="text",
outputs="text",
theme=gr.themes.Glass(),
examples=[
"How many credits have been issued in the region South America?",
"What is the largest project in terms of credits issued?",
"What is the project with the latest crediting start date?",
"What registry has the highest number of retired credits?",
"What project has issued the largest buffer credits?",
"What is the status of project with registry id 4435?",
"How many nature projects are in Africa?"],
title=title,
article=long_desc,
delete_cache=(10, 10)
)
try:
app.launch()
print("--------App Loaded------")
finally:
app.deconstruct()