import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import cartopy.crs as ccrs
import cartopy.feature as cfeature
PERCENTILE = 50
# Column names for STORMS data
colnames = ['sim', 'month', 't', 'i', 'basin', 'lat', 'lon', 'min_press', 'wind', 'radius', 'CAT', 'landfall', 'dist_land']
# Read and Prepare Observed Data (main1_NA.csv)
dtype_dict = {
'seqnum': float,
'i': float,
'lat': float,
'lon': float,
'f-i': float,
'forward_angle': float,
'wind': float,
'wind_u': float,
'wind_v': float,
'landfall': float,
'time': str,
'tracknum': str
}
cols = [
'seqnum', 'i', 'lat', 'lon', 'f-i', 'forward_angle',
'wind', 'wind_u', 'wind_v', 'landfall', 'time', 'tracknum'
]
main1 = pd.read_csv('main1_NA.csv', names=cols, header=0, dtype=dtype_dict)
main1['time2'] = pd.to_datetime(main1['time'])
main1['year'] = main1['time2'].dt.year
main1 = main1[(main1['year'] >= 1944) & (main1['year'] <= 2024)]
num_unique_years = main1['year'].nunique()
print(num_unique_years)
# Compute Observed Track Density
lon_bins = np.arange(-100, 12, 2)
lat_bins = np.arange(4, 52, 2)
H_obs, _, _ = np.histogram2d(main1['lat'], main1['lon'], bins=[lat_bins, lon_bins])
# Read and Prepare Simulated Data (STORMS files)
# Read and concatenate STORMS data files (0-9) to create 'all' file
print("Reading STORMS data files...")
storms_data = []
for file_num in range(10): # Files 0 through 9
filename = f'STORM/STORM_DATA_IBTRACS_NA_1000_YEARS_{file_num}.txt'
print(f"Reading {filename}...")
# Read the data file
df = pd.read_csv(filename, header=None, names=colnames, skipinitialspace=True)
# Add file offset to simulation number to make them unique across files
df['sim'] = df['sim'] + (file_num * 1000) # Offset by 1000*file_num to avoid sim number conflicts
storms_data.append(df)
# Concatenate all data
hits = pd.concat(storms_data, ignore_index=True)
print(f"Total records loaded: {len(hits)}")
# Convert longitude from 0-360 to -180-180 format for Atlantic basin
hits['lon'] = np.where(hits['lon'] > 180, hits['lon'] - 360, hits['lon'])
# Filter for North Atlantic basin (basin = 1)
hits = hits[hits['basin'] == 1].copy()
# Create time column and extract year (using sim as year proxy since each sim represents a season)
# For STORMS data, we'll use 'sim' as the year identifier since each simulation represents one season
hits['year'] = hits['sim']
print(f"Records after filtering for NA basin: {len(hits)}")
print(f"Number of unique simulations (years): {hits['year'].nunique()}")
# Compute Simulated Track Density (User-Selected Percentile)
sim_density_samples = []
sim_values = hits['sim'].unique()
num_sims_to_sample = num_unique_years
num_draws = 100
print(f"Processing {num_draws} random samples of {num_sims_to_sample} simulations each...")
for draw in range(num_draws):
if (draw + 1) % 20 == 0:
print(f"Completed {draw + 1}/{num_draws} draws")
selected_sims = np.random.choice(sim_values, size=num_sims_to_sample, replace=False)
sample = hits[hits['sim'].isin(selected_sims)]
H_sim, _, _ = np.histogram2d(sample['lat'], sample['lon'], bins=[lat_bins, lon_bins])
sim_density_samples.append(H_sim)
sim_density_samples = np.stack(sim_density_samples)
H_sim_percentile = np.percentile(sim_density_samples, PERCENTILE, axis=0)
print("Creating plots...")
# Plot Side-by-Side Density Maps with Land Outlines and Save as TIFF and JPG
fig = plt.figure(figsize=(16, 7), constrained_layout=True)
# Observed (left)
ax1 = fig.add_subplot(1, 2, 1, projection=ccrs.PlateCarree())
im1 = ax1.imshow(
H_obs, origin='lower', aspect='auto', cmap='jet',
extent=[lon_bins[0], lon_bins[-1], lat_bins[0], lat_bins[-1]],
norm=Normalize(vmin=0, vmax=max(H_obs.max(), H_sim_percentile.max())),
transform=ccrs.PlateCarree()
)
ax1.set_title('Observed Track Density (1944-2024)')
ax1.set_xlabel('Longitude')
ax1.set_ylabel('Latitude')
ax1.set_extent([lon_bins[0], lon_bins[-1], lat_bins[0], lat_bins[-1]], crs=ccrs.PlateCarree())
ax1.add_feature(cfeature.COASTLINE.with_scale('50m'), edgecolor='white', linewidth=1.2)
ax1.add_feature(cfeature.LAND, facecolor='none', edgecolor='white', linewidth=1.2)
# Simulated (right, user-selected percentile)
ax2 = fig.add_subplot(1, 2, 2, projection=ccrs.PlateCarree())
im2 = ax2.imshow(
H_sim_percentile, origin='lower', aspect='auto', cmap='jet',
extent=[lon_bins[0], lon_bins[-1], lat_bins[0], lat_bins[-1]],
norm=Normalize(vmin=0, vmax=max(H_obs.max(), H_sim_percentile.max())),
transform=ccrs.PlateCarree()
)
ax2.set_title(f'Simulated Track Density ({PERCENTILE}th percentile)')
ax2.set_xlabel('Longitude')
ax2.set_extent([lon_bins[0], lon_bins[-1], lat_bins[0], lat_bins[-1]], crs=ccrs.PlateCarree())
ax2.add_feature(cfeature.COASTLINE.with_scale('50m'), edgecolor='white', linewidth=1.2)
ax2.add_feature(cfeature.LAND, facecolor='none', edgecolor='white', linewidth=1.2)
# Shared colorbar
cbar = fig.colorbar(im1, ax=[ax1, ax2], orientation='vertical', fraction=0.025, pad=0.04, label='Track Count')
# Save the figure as both TIFF and JPG files
plt.savefig('NAtrack_density_comparisonSTORM_50.tiff', format='tiff', dpi=300)
plt.savefig('NAtrack_density_comparisonSTORM_50.jpg', format='jpeg', dpi=300)
print("Plots saved as NAtrack_density_comparisonSTORM_50.tiff and NAtrack_density_comparisonSTORM_50.jpg")
plt.show()
print("Analysis complete!")