GeoLLM / Task2 /kunkun_ndvi.py
Pengfa Li
Upload folder using huggingface_hub
badcf3c verified
# 从Excel中读取两列数据,分别是NDVI和鸟类数量
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from sklearn.metrics import r2_score
import numpy as np
# 读取Excel文件
df = pd.read_excel('E:/Downloads/FAC终值.xlsx',sheet_name='NDVI')
# 获取NDVI和鸟类数量两列数据
NDVI1 = df['NDVI1']
# NDVI2 = df['NDVI2'][0:106]
# NDVI3 = df['NDVI3'][0:52]
# NDVI4 = df['NDVI4'][0:11]
num1 = df['NUM1']
# num2 = df['数量2'][0:106]
# num3 = df['数量3'][0:52]
# num4 = df['数量4'][0:11]
print(len(NDVI1), len(num1))
# num标准化到【0,1】,NDVI标准化到【-1,1】
# num1 = (num1 - np.min(num1)) / (np.max(num1) - np.min(num1))
# num2 = (num2 - np.min(num2)) / (np.max(num2) - np.min(num2))
# num3 = (num3 - np.min(num3)) / (np.max(num3) - np.min(num3))
# num4 = (num4 - np.min(num4)) / (np.max(num4) - np.min(num4))
# NDVI1 = (NDVI1 - np.min(NDVI1)) / (np.max(NDVI1) - np.min(NDVI1))
# NDVI2 = (NDVI2 - np.min(NDVI2)) / (np.max(NDVI2) - np.min(NDVI2))
# NDVI3 = (NDVI3 - np.min(NDVI3)) / (np.max(NDVI3) - np.min(NDVI3))
# NDVI4 = (NDVI4 - np.min(NDVI4)) / (np.max(NDVI4) - np.min(NDVI4))
# 对两列数据进行高斯拟合,返回拟合的参数
NDVI = [ NDVI1]
num = [ num1]
# 使用高斯函数拟合
def gaussian(x, a, b, c, d):
return a * np.exp(-(x - b) ** 2 / (2 * c ** 2)) + d
# 使用高斯拟合
def gaussian_fit(x, y):
# 使用curve_fit拟合
params, _ = curve_fit(gaussian, x, y)
return params
# 使用高斯拟合并计算R²值
for i in range(len(NDVI)):
if len(NDVI[i]) == len(num[i]):
params = gaussian_fit(NDVI[i], num[i])
# 设置num轴为指数坐标轴
# plt.semilogy(NDVI[i], num[i], label=f'Data {i+1}')
# 设置num轴为对数坐标轴
# plt.semilogy(NDVI[i], num[i], label=f'Data {i+1}')
# 绘制拟合曲线
x_fit = np.linspace(min(NDVI[i]), max(NDVI[i]), 100)
y_fit = gaussian(x_fit, *params)
# 输出拟合参数以及对应R²值和参数的名称,四个参数值分别是什么
print(f'拟合参数a表示振幅: {params[0]}')
print(f'拟合参数b表示均值: {params[1]}')
print(f'拟合参数c表示标准差: {params[2]}')
print(f'拟合参数d表示偏移量: {params[3]}')
plt.plot(x_fit, y_fit, label=f'Fit {i+1}')
# 绘制散点图
plt.scatter(NDVI[i], num[i], label=f'Data {i+1}')
# 计算R²值
y_pred = gaussian(NDVI[i], *params)
R2 = r2_score(num[i], y_pred)
print(f'R² for Fit {i+1}: {R2:.4f}')
else:
print(f"数据长度不匹配: NDVI长度={len(NDVI[i])}, 数量长度={len(num[i])}")
plt.legend()
plt.show()
# # 读取E:\bang\KUNKUN\省市区县鸟种、观鸟记录等相关数据(1980-2024年)\1980~2024年观鸟记录、经纬度及其所处的省市区县数据.dta
# import pandas as pd
# # 读取dta文件
# df_id = pd.read_excel('E:/bang/KUNKUN/省市区县鸟种、观鸟记录等相关数据(1980-2024年)/江西省.xlsx')
# df = pd.read_stata('E:/bang/KUNKUN/省市区县鸟种、观鸟记录等相关数据(1980-2024年)/1980~2024年鸟种观测统计报告(更新后).dta')
# # 提取df_id的id列
# id_list = df_id['reportId'].tolist()
# # 打印df的列名
# print(df.columns)
# # 打印df的行数
# print(df.shape[0])
# # 根据id_list提取df的记录
# jiangxi_df = df[df['reportId'].isin(id_list)]
# # 保存为csv
# jiangxi_df.to_excel('E:/bang/KUNKUN/省市区县鸟种、观鸟记录等相关数据(1980-2024年)/江西省1.xlsx')