khushidhar1210 commited on
Commit
4c45263
·
verified ·
1 Parent(s): 9aad01f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -85
app.py CHANGED
@@ -10,99 +10,100 @@ import streamlit as st
10
  from transformers import pipeline
11
 
12
 
13
- # Function to parse KML
14
- def parse_kml(kml_file):
15
- # kml_file will be in bytes format, so we decode it to a string
16
- kml_content = kml_file.decode('utf-8') # Decode bytes to string
17
- k = ET.ElementTree(ET.fromstring(kml_content))
18
  root = k.getroot()
19
  ns = {'kml': 'http://www.opengis.net/kml/2.2'}
20
-
 
21
  shapes = []
22
- for placemark in root.findall('.//kml:Placemark', ns):
23
- polygon = placemark.find('.//kml:Polygon/kml:coordinates', ns)
24
  if polygon is not None:
25
  coordinates = polygon.text.strip().split()
26
  coords = [(float(lon), float(lat)) for lon, lat, _ in [coord.split(',') for coord in coordinates]]
27
  shapes.append(shape({'type': 'Polygon', 'coordinates': [coords]}))
28
 
29
- line = placemark.find('.//kml:LineString/kml:coordinates', ns)
30
  if line is not None:
31
  coordinates = line.text.strip().split()
32
  coords = [(float(lon), float(lat)) for lon, lat in [coord.split(',') for coord in coordinates]]
33
  shapes.append(shape({'type': 'LineString', 'coordinates': coords}))
34
 
35
- point = placemark.find('.//kml:Point/kml:coordinates', ns)
36
  if point is not None:
37
  lon, lat, _ = point.text.strip().split(',')
38
  shapes.append(shape({'type': 'Point', 'coordinates': (float(lon), float(lat))}))
39
 
40
  return shapes if shapes else None
41
 
42
-
43
- def extract_kmz(kmz_file):
44
  with zipfile.ZipFile(kmz_file, 'r') as zip_ref:
45
  zip_ref.extractall('temp_kml')
46
  kml_file = [f for f in os.listdir('temp_kml') if f.endswith('.kml')][0]
47
  with open(os.path.join('temp_kml', kml_file), 'rb') as f:
48
- return parse_kml(f.read())
49
 
50
- # Function to handle KML/KMZ file input
51
- def handle_kml_upload(uploaded_file):
52
- # Read the uploaded file as bytes
53
- file_bytes = uploaded_file.read()
54
 
55
- # If the file is a KMZ, first extract the KML file from the KMZ archive
56
- if uploaded_file.name.endswith('.kmz'):
57
- kml_shapes = extract_kmz(file_bytes)
58
  else:
59
- # Otherwise, it's a direct KML file
60
- kml_shapes = parse_kml(file_bytes)
61
 
62
- return kml_shapes
63
 
64
 
65
- # Streamlit file uploader widget
66
  st.title("Flood Zone Analysis")
67
- uploaded_file = st.file_uploader("Upload KML/KMZ file", type=['kml', 'kmz'])
68
 
69
- # Step 7: Compare KML with Shapefile
70
- def compare_kml_with_shapefile(kml_shapes, gdf):
71
- if kml_shapes.empty: # Correct check for an empty GeoSeries
72
  return "Invalid KML shape or no valid polygon found.", None
73
- intersections = []
74
- for kml_shape in kml_shapes:
75
  intersection = gdf[gdf.intersects(kml_shape)]
76
  if not intersection.empty:
77
- intersections.append(intersection)
78
- if not intersections:
79
  return "Boundary doesn't match", None
80
- merged_geometry = unary_union([geom for intersect in intersections for geom in intersect.geometry])
81
- return intersections, merged_geometry
82
-
83
-
84
- # Step 8: Calculate Acreage and Usable Land (based on shapefile)
85
- def calculate_acreage_and_usable_land(intersections, merged_geometry):
86
- all_intersections = pd.concat(intersections)
87
- all_intersections['area'] = all_intersections.geometry.area
88
- all_intersections['area_acres'] = all_intersections['area'] / 4046.86 # Convert to acres
89
- flood_zone_areas = {zone: all_intersections[all_intersections['FLD_ZONE'] == zone]['area_acres'].sum() for zone in all_intersections['FLD_ZONE'].unique()}
90
- flood_prone_zones = ['A', 'AE', 'AH', 'AO', 'VE']
91
- non_flooded_area = all_intersections[~all_intersections['FLD_ZONE'].isin(flood_prone_zones)]['area_acres'].sum()
92
- merged_area = merged_geometry.area / 4046.86 # Convert to acres
93
- return flood_zone_areas, non_flooded_area, merged_area
94
-
95
- # Step 9: Summarize Data using GPT-2
96
- def summarize_flood_data_gpt2(flood_zone_areas, non_flooded_area, total_acreage):
97
- # Use Hugging Face's hosted pipeline for T5 small
98
- summarizer = pipeline("summarization", model="t5-small")
99
-
100
- flood_prone_zones = ['A', 'AE', 'AH', 'AO', 'VE']
101
- flood_zone_summary = "\n".join([f" Zone {zone}: {flood_zone_areas.get(zone, 0):.2f} acres" for zone in flood_prone_zones])
 
 
102
 
103
  prompt = f"""
104
  **Total Land Area**: {total_acreage:.2f} acres
105
- **Usable Area**: {non_flooded_area:.2f} acres
106
  **Flood-prone Zones**:
107
  {flood_zone_summary}
108
  Summarize the above given data in a 2-3 sentences.
@@ -110,64 +111,61 @@ def summarize_flood_data_gpt2(flood_zone_areas, non_flooded_area, total_acreage)
110
 
111
  response = summarizer(prompt, max_length=200, min_length=30, do_sample=False)
112
 
113
- # Use summary_text instead of generated_text for T5 model output
114
  return response[0]['summary_text']
115
 
116
 
117
- if uploaded_file is not None:
118
- # Step 1: Read in the shapefiles
119
- shapefile_1 = gpd.read_file("K_FLD_HAZ_AR.shp")
120
- shapefile_2 = gpd.read_file("N_FLD_HAZ_AR.shp")
121
- shapefile_3 = gpd.read_file("S_FLD_HAZ_AR.shp")
 
 
 
122
 
123
- # Step 2: Concatenate them into a single GeoDataFrame
124
- merged_gdf = gpd.GeoDataFrame(pd.concat([shapefile_1, shapefile_2, shapefile_3], ignore_index=True))
125
 
126
- # Step 3: Ensure CRS consistency
127
- merged_gdf = merged_gdf.set_crs(shapefile_1.crs, allow_override=True)
128
 
129
- # Step 4: Reproject GeoDataFrame to a common projected CRS (EPSG:3857)
130
- merged_gdf = merged_gdf.to_crs(epsg=3857)
131
 
132
- # Step 5: Validate and fix invalid geometries
133
- merged_gdf['geometry'] = merged_gdf['geometry'].apply(lambda x: x.buffer(0) if not x.is_valid else x)
134
 
135
- # Step 6: Process uploaded KML/KMZ file
136
- kml_shapes = handle_kml_upload(uploaded_file)
 
137
 
138
- if kml_shapes:
139
- # Ensure that the KML shapes are valid geometries
140
- kml_gdf = gpd.GeoDataFrame(geometry=kml_shapes, crs="EPSG:4326")
141
 
142
- # Convert to projected CRS (EPSG:3857) for consistency with shapefile
143
  kml_gdf = kml_gdf.to_crs(epsg=3857)
144
 
145
  # Compare KML and Shapefile
146
- intersection, merged_geometry = compare_kml_with_shapefile(kml_gdf.geometry, merged_gdf)
147
 
148
  if isinstance(intersection, str):
149
  st.write(intersection)
150
  else:
151
- flood_zone_areas, non_flooded_area, merged_area = calculate_acreage_and_usable_land(intersection, merged_geometry)
152
  st.write(f"Flood Zone Areas:")
153
  for zone, area in flood_zone_areas.items():
154
  st.write(f" Zone {zone}: {area:.2f} acres")
155
- st.write(f"\nNon-Flooded Land Area: {non_flooded_area:.2f} acres")
156
  st.write(f"\nMerged Area of Intersected Boundary: {merged_area:.2f} acres")
157
- summary = summarize_flood_data_gpt2(flood_zone_areas, non_flooded_area, merged_area)
158
  st.write(f"GPT-2 Summary: {summary}")
159
 
160
- # Step 10: Visualize the intersection of KML shapes and merged shapefile
161
  fig, ax = plt.subplots(figsize=(10, 10))
162
 
163
- # Plot the merged shapefile (blue color)
164
- merged_gdf.plot(ax=ax, color='blue', alpha=0.5)
165
 
166
- # Highlight the intersection area (overlap with KML)
167
  if intersection:
168
- # Combine the intersection geometries into a single geometry for display
169
  intersection_geom = unary_union([geom for intersect in intersection for geom in intersect.geometry])
170
- gpd.GeoDataFrame(geometry=[intersection_geom], crs=merged_gdf.crs).plot(ax=ax, color='red', alpha=0.7)
171
 
172
  # Plot the KML boundary (green color)
173
  kml_gdf.plot(ax=ax, color='green', alpha=0.3)
 
10
  from transformers import pipeline
11
 
12
 
13
+ # For KML access
14
+ def p(kml_file):
15
+ cont = kml_file.decode('utf-8') # Decode bytes to string
16
+ k = ET.ElementTree(ET.fromstring(cont))
 
17
  root = k.getroot()
18
  ns = {'kml': 'http://www.opengis.net/kml/2.2'}
19
+
20
+ #Basically the KML files have polygon and other shapes. So we are stripping that info and putting it in a proper format
21
  shapes = []
22
+ for mark in root.findall('.//kml:Placemark', ns):
23
+ polygon = mark.find('.//kml:Polygon/kml:coordinates', ns)
24
  if polygon is not None:
25
  coordinates = polygon.text.strip().split()
26
  coords = [(float(lon), float(lat)) for lon, lat, _ in [coord.split(',') for coord in coordinates]]
27
  shapes.append(shape({'type': 'Polygon', 'coordinates': [coords]}))
28
 
29
+ line = mark.find('.//kml:LineString/kml:coordinates', ns)
30
  if line is not None:
31
  coordinates = line.text.strip().split()
32
  coords = [(float(lon), float(lat)) for lon, lat in [coord.split(',') for coord in coordinates]]
33
  shapes.append(shape({'type': 'LineString', 'coordinates': coords}))
34
 
35
+ point = mark.find('.//kml:Point/kml:coordinates', ns)
36
  if point is not None:
37
  lon, lat, _ = point.text.strip().split(',')
38
  shapes.append(shape({'type': 'Point', 'coordinates': (float(lon), float(lat))}))
39
 
40
  return shapes if shapes else None
41
 
42
+ #For file extraction if it is in KMZ form
43
+ def ext(kmz_file):
44
  with zipfile.ZipFile(kmz_file, 'r') as zip_ref:
45
  zip_ref.extractall('temp_kml')
46
  kml_file = [f for f in os.listdir('temp_kml') if f.endswith('.kml')][0]
47
  with open(os.path.join('temp_kml', kml_file), 'rb') as f:
48
+ return p(f.read())
49
 
50
+ # See if it is a kml or kmz file
51
+ def choose(upf):
52
+ file_bytes = upf.read()
 
53
 
54
+ # If KMZ, extract the KML file
55
+ if upf.name.endswith('.kmz'):
56
+ f = ext(file_bytes)
57
  else:
58
+ # Else KML
59
+ f = p(file_bytes)
60
 
61
+ return f #(file)
62
 
63
 
64
+ # For file uploading
65
  st.title("Flood Zone Analysis")
66
+ upf = st.file_uploader("Upload KML/KMZ file", type=['kml', 'kmz'])
67
 
68
+ # For compaing the boundary between KML and shapefile
69
+ def bound(f, gdf):
70
+ if f.empty: #TO handle any case where file has no info
71
  return "Invalid KML shape or no valid polygon found.", None
72
+ overlaps = [] #TO save the matching boundaries
73
+ for kml_shape in f:
74
  intersection = gdf[gdf.intersects(kml_shape)]
75
  if not intersection.empty:
76
+ overlaps.append(intersection)
77
+ if not overlaps:
78
  return "Boundary doesn't match", None
79
+ every_int = unary_union([geom for intersect in overlaps for geom in intersect.geometry])
80
+ return overlaps, every_int
81
+
82
+
83
+ # Find common bound's Acreage and Usable Land
84
+ def land(overlaps, every_int):
85
+ all = pd.concat(overlaps)
86
+ all['area'] = all.geometry.area
87
+ all['area_acres'] = all['area'] / 4046.86 # Convert to acres
88
+ #FLood zones that are unique
89
+ fza = {zone: all[all['FLD_ZONE'] == zone]['area_acres'].sum() for zone in all['FLD_ZONE'].unique()}
90
+ #fza = flood zones
91
+ areas = ['A', 'AE', 'AH', 'AO', 'VE']
92
+ #For no flood areas
93
+ non = all[~all['FLD_ZONE'].isin(areas)]['area_acres'].sum()
94
+ merged_area = every_int.area / 4046.86
95
+ return fza, non, merged_area
96
+
97
+ # Initial summary was with llama which performed well but due to api issues, had to use GPT-2
98
+ def summ(fza, non, total_acreage):
99
+ summarizer = pipeline("summarization", model="gpt2")
100
+
101
+ areas = ['A', 'AE', 'AH', 'AO', 'VE']
102
+ flood_zone_summary = "\n".join([f" Zone {zone}: {fza.get(zone, 0):.2f} acres" for zone in areas])
103
 
104
  prompt = f"""
105
  **Total Land Area**: {total_acreage:.2f} acres
106
+ **Usable Area**: {non:.2f} acres
107
  **Flood-prone Zones**:
108
  {flood_zone_summary}
109
  Summarize the above given data in a 2-3 sentences.
 
111
 
112
  response = summarizer(prompt, max_length=200, min_length=30, do_sample=False)
113
 
 
114
  return response[0]['summary_text']
115
 
116
 
117
+ if upf is not None:
118
+ # Read shapefiles
119
+ kent = gpd.read_file("K_FLD_HAZ_AR.shp")
120
+ nc = gpd.read_file("N_FLD_HAZ_AR.shp")
121
+ sussex = gpd.read_file("S_FLD_HAZ_AR.shp")
122
+
123
+ # Put them together
124
+ dela = gpd.GeoDataFrame(pd.concat([kent, nc, sussex], ignore_index=True))
125
 
126
+ # Add Coordinate Reference System
127
+ dela = dela.set_crs(kent.crs, allow_override=True)
128
 
129
+ dela = dela.to_crs(epsg=3857)
 
130
 
131
+ # fix invalid geometries
132
+ dela['geometry'] = dela['geometry'].apply(lambda x: x.buffer(0) if not x.is_valid else x)
133
 
134
+ # Uplead KML/KMZ file
135
+ f = choose(upf)
136
 
137
+ if f:
138
+ # check if KML has valid geometries
139
+ kml_gdf = gpd.GeoDataFrame(geometry=f, crs="EPSG:4326")
140
 
 
 
 
141
 
 
142
  kml_gdf = kml_gdf.to_crs(epsg=3857)
143
 
144
  # Compare KML and Shapefile
145
+ intersection, every_int = bound(kml_gdf.geometry, dela)
146
 
147
  if isinstance(intersection, str):
148
  st.write(intersection)
149
  else:
150
+ flood_zone_areas, non, merged_area = land(intersection, every_int)
151
  st.write(f"Flood Zone Areas:")
152
  for zone, area in flood_zone_areas.items():
153
  st.write(f" Zone {zone}: {area:.2f} acres")
154
+ st.write(f"\nNon-Flooded Land Area: {non:.2f} acres")
155
  st.write(f"\nMerged Area of Intersected Boundary: {merged_area:.2f} acres")
156
+ summary = summ(flood_zone_areas, non, merged_area)
157
  st.write(f"GPT-2 Summary: {summary}")
158
 
159
+ # Show map
160
  fig, ax = plt.subplots(figsize=(10, 10))
161
 
162
+ # shapefile
163
+ dela.plot(ax=ax, color='blue', alpha=0.5)
164
 
165
+ # Show overlap with KML
166
  if intersection:
 
167
  intersection_geom = unary_union([geom for intersect in intersection for geom in intersect.geometry])
168
+ gpd.GeoDataFrame(geometry=[intersection_geom], crs=dela.crs).plot(ax=ax, color='red', alpha=0.7)
169
 
170
  # Plot the KML boundary (green color)
171
  kml_gdf.plot(ax=ax, color='green', alpha=0.3)