import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import cartopy.crs as ccrs
import cartopy.feature as cfeature
PERCENTILE = 50
WIND_THRESH = 64
# Column names for STORMS data
colnames = ['sim', 'month', 't', 'i', 'basin', 'lat', 'lon', 'min_press', 'wind', 'radius', 'CAT', 'landfall', 'dist_land']
# Read and concatenate STORMS data files (0-9) to create 'all' file
print("Reading STORMS data files...")
storms_data = []
for file_num in range(10): # Files 0 through 9
filename = f'STORM/STORM_DATA_IBTRACS_NA_1000_YEARS_{file_num}.txt'
print(f"Reading {filename}...")
# Read the data file
df = pd.read_csv(filename, header=None, names=colnames, skipinitialspace=True)
# Add file offset to simulation number to make them unique across files
df['sim'] = df['sim'] + (file_num * 1000) # Offset by 1000*file_num to avoid sim number conflicts
storms_data.append(df)
# Concatenate all data
hits = pd.concat(storms_data, ignore_index=True)
print(f"Total records loaded: {len(hits)}")
# Convert wind speed from m/s to knots (1 m/s = 1.94384 knots)
hits['wind'] = hits['wind'] * 1.94384
# Convert longitude from 0-360 to -180-180 format for Atlantic basin
hits['lon'] = np.where(hits['lon'] > 180, hits['lon'] - 360, hits['lon'])
# Filter for North Atlantic basin (basin = 1)
hits = hits[hits['basin'] == 1].copy()
# Create time column and extract year (using sim as year proxy since each sim represents a season)
# For STORMS data, we'll use 'sim' as the year identifier since each simulation represents one season
hits['year'] = hits['sim']
print(f"Records after filtering for NA basin: {len(hits)}")
print(f"Number of unique simulations (years): {hits['year'].nunique()}")
# Observed Data (keep original observed data processing unchanged)
main1 = pd.read_csv('main1_NA.csv') # ...header/dtype as before
main1['time2'] = pd.to_datetime(main1['time'])
main1['year'] = main1['time2'].dt.year
main1 = main1[(main1['year'] >= 1944) & (main1['year'] <= 2024)]
num_unique_years = main1['year'].nunique()
lon_bins = np.arange(-100, 12, 2)
lat_bins = np.arange(4, 52, 2)
obs_hit = np.zeros((len(lat_bins)-1, len(lon_bins)-1))
for year in main1['year'].unique():
year_data = main1[main1['year'] == year]
# For each grid box, what is the max wind this year?
H_max, _, _ = np.histogram2d(year_data['lat'], year_data['lon'],
bins=[lat_bins, lon_bins],
weights=year_data['wind'])
# Where was max wind >= WIND_THRESH?
hit_mask = (H_max >= WIND_THRESH)
obs_hit += hit_mask.astype(int)
# Calculate probability (fraction of years)
obs_prob = obs_hit / num_unique_years
# Simulated Data (adapted for STORMS data format)
sim_values = hits['year'].unique() # Use 'year' (which is sim number) as identifier
num_sims_to_sample = num_unique_years
num_draws = 100
sim_prob_samples = []
print(f"Processing {num_draws} random samples of {num_sims_to_sample} simulations each...")
for draw in range(num_draws):
if (draw + 1) % 20 == 0:
print(f"Completed {draw + 1}/{num_draws} draws")
selected_sims = np.random.choice(sim_values, size=num_sims_to_sample, replace=False)
sim_hit = np.zeros((len(lat_bins)-1, len(lon_bins)-1))
for sim in selected_sims:
sim_data = hits[hits['year'] == sim]
H_max, _, _ = np.histogram2d(sim_data['lat'], sim_data['lon'],
bins=[lat_bins, lon_bins],
weights=sim_data['wind'])
hit_mask = (H_max >= WIND_THRESH)
sim_hit += hit_mask.astype(int)
sim_prob = sim_hit / num_sims_to_sample
sim_prob_samples.append(sim_prob)
sim_prob_samples = np.stack(sim_prob_samples)
sim_prob_percentile = np.percentile(sim_prob_samples, PERCENTILE, axis=0)
print("Creating plots...")
# Plot: vmax=0.5 for color scale
fig = plt.figure(figsize=(16, 7), constrained_layout=True)
ax1 = fig.add_subplot(1, 2, 1, projection=ccrs.PlateCarree())
im1 = ax1.imshow(
obs_prob, origin='lower', aspect='auto', cmap='jet',
extent=[lon_bins[0], lon_bins[-1], lat_bins[0], lat_bins[-1]],
norm=Normalize(vmin=0, vmax=0.5),
transform=ccrs.PlateCarree()
)
ax1.set_title('Observed: Annual Max Wind ≥64 kt Probability')
ax1.set_xlabel('Longitude')
ax1.set_ylabel('Latitude')
ax1.set_extent([lon_bins[0], lon_bins[-1], lat_bins[0], lat_bins[-1]], crs=ccrs.PlateCarree())
ax1.add_feature(cfeature.COASTLINE.with_scale('50m'), edgecolor='white', linewidth=1.2)
ax1.add_feature(cfeature.LAND, facecolor='none', edgecolor='white', linewidth=1.2)
ax2 = fig.add_subplot(1, 2, 2, projection=ccrs.PlateCarree())
im2 = ax2.imshow(
sim_prob_percentile, origin='lower', aspect='auto', cmap='jet',
extent=[lon_bins[0], lon_bins[-1], lat_bins[0], lat_bins[-1]],
norm=Normalize(vmin=0, vmax=0.5),
transform=ccrs.PlateCarree()
)
ax2.set_title(f'Simulated: Annual Max Wind ≥64 kt Probability (Percentile={PERCENTILE})')
ax2.set_xlabel('Longitude')
ax2.set_extent([lon_bins[0], lon_bins[-1], lat_bins[0], lat_bins[-1]], crs=ccrs.PlateCarree())
ax2.add_feature(cfeature.COASTLINE.with_scale('50m'), edgecolor='white', linewidth=1.2)
ax2.add_feature(cfeature.LAND, facecolor='none', edgecolor='white', linewidth=1.2)
cbar = fig.colorbar(im1, ax=[ax1, ax2], orientation='vertical', fraction=0.025, pad=0.04, label='Probability per Year (Annual Max Wind ≥64 kt)')
# Save both TIFF and JPG formats
plt.savefig('NAwind_prob_comparisonSTORM_64.tiff', format='tiff', dpi=300)
plt.savefig('NAwind_prob_comparisonSTORM_64.jpg', format='jpeg', dpi=300)
print("Plots saved as NAwind_prob_comparisonSTORM_64.tiff and NAwind_prob_comparisonSTORM_64.jpg")
plt.show()
print("Analysis complete!")