import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import cartopy.crs as ccrs
import cartopy.feature as cfeature

PERCENTILE = 50 

# Column names for STORMS data
colnames = ['sim', 'month', 't', 'i', 'basin', 'lat', 'lon', 'min_press', 'wind', 'radius', 'CAT', 'landfall', 'dist_land']

# Read and Prepare Observed Data (main1_NA.csv) 

dtype_dict = {
    'seqnum': float,
    'i': float,
    'lat': float,
    'lon': float,
    'f-i': float,
    'forward_angle': float,
    'wind': float,
    'wind_u': float,
    'wind_v': float,
    'landfall': float,
    'time': str,
    'tracknum': str
}
cols = [
    'seqnum', 'i', 'lat', 'lon', 'f-i', 'forward_angle',
    'wind', 'wind_u', 'wind_v', 'landfall', 'time', 'tracknum'
]
main1 = pd.read_csv('main1_NA.csv', names=cols, header=0, dtype=dtype_dict)
main1['time2'] = pd.to_datetime(main1['time'])
main1['year'] = main1['time2'].dt.year
main1 = main1[(main1['year'] >= 1944) & (main1['year'] <= 2024)]
num_unique_years = main1['year'].nunique()
print(num_unique_years)

# Compute Observed Track Density 

lon_bins = np.arange(-100, 12, 2)
lat_bins = np.arange(4, 52, 2)
H_obs, _, _ = np.histogram2d(main1['lat'], main1['lon'], bins=[lat_bins, lon_bins])

# Read and Prepare Simulated Data (STORMS files) 

# Read and concatenate STORMS data files (0-9) to create 'all' file
print("Reading STORMS data files...")
storms_data = []

for file_num in range(10):  # Files 0 through 9
    filename = f'STORM/STORM_DATA_IBTRACS_NA_1000_YEARS_{file_num}.txt'
    print(f"Reading {filename}...")
    
    # Read the data file
    df = pd.read_csv(filename, header=None, names=colnames, skipinitialspace=True)
    
    # Add file offset to simulation number to make them unique across files
    df['sim'] = df['sim'] + (file_num * 1000)  # Offset by 1000*file_num to avoid sim number conflicts
    
    storms_data.append(df)

# Concatenate all data
hits = pd.concat(storms_data, ignore_index=True)
print(f"Total records loaded: {len(hits)}")

# Convert longitude from 0-360 to -180-180 format for Atlantic basin
hits['lon'] = np.where(hits['lon'] > 180, hits['lon'] - 360, hits['lon'])

# Filter for North Atlantic basin (basin = 1) 
hits = hits[hits['basin'] == 1].copy()

# Create time column and extract year (using sim as year proxy since each sim represents a season)
# For STORMS data, we'll use 'sim' as the year identifier since each simulation represents one season
hits['year'] = hits['sim']

print(f"Records after filtering for NA basin: {len(hits)}")
print(f"Number of unique simulations (years): {hits['year'].nunique()}")

# Compute Simulated Track Density (User-Selected Percentile) 
sim_density_samples = []
sim_values = hits['sim'].unique()
num_sims_to_sample = num_unique_years
num_draws = 100

print(f"Processing {num_draws} random samples of {num_sims_to_sample} simulations each...")

for draw in range(num_draws):
    if (draw + 1) % 20 == 0:
        print(f"Completed {draw + 1}/{num_draws} draws")
    
    selected_sims = np.random.choice(sim_values, size=num_sims_to_sample, replace=False)
    sample = hits[hits['sim'].isin(selected_sims)]
    H_sim, _, _ = np.histogram2d(sample['lat'], sample['lon'], bins=[lat_bins, lon_bins])
    sim_density_samples.append(H_sim)

sim_density_samples = np.stack(sim_density_samples)
H_sim_percentile = np.percentile(sim_density_samples, PERCENTILE, axis=0)

print("Creating plots...")

# Plot Side-by-Side Density Maps with Land Outlines and Save as TIFF and JPG 
fig = plt.figure(figsize=(16, 7), constrained_layout=True)

# Observed (left)
ax1 = fig.add_subplot(1, 2, 1, projection=ccrs.PlateCarree())
im1 = ax1.imshow(
    H_obs, origin='lower', aspect='auto', cmap='jet',
    extent=[lon_bins[0], lon_bins[-1], lat_bins[0], lat_bins[-1]],
    norm=Normalize(vmin=0, vmax=max(H_obs.max(), H_sim_percentile.max())),
    transform=ccrs.PlateCarree()
)
ax1.set_title('Observed Track Density (1944-2024)')
ax1.set_xlabel('Longitude')
ax1.set_ylabel('Latitude')
ax1.set_extent([lon_bins[0], lon_bins[-1], lat_bins[0], lat_bins[-1]], crs=ccrs.PlateCarree())
ax1.add_feature(cfeature.COASTLINE.with_scale('50m'), edgecolor='white', linewidth=1.2)
ax1.add_feature(cfeature.LAND, facecolor='none', edgecolor='white', linewidth=1.2)

# Simulated (right, user-selected percentile)
ax2 = fig.add_subplot(1, 2, 2, projection=ccrs.PlateCarree())
im2 = ax2.imshow(
    H_sim_percentile, origin='lower', aspect='auto', cmap='jet',
    extent=[lon_bins[0], lon_bins[-1], lat_bins[0], lat_bins[-1]],
    norm=Normalize(vmin=0, vmax=max(H_obs.max(), H_sim_percentile.max())),
    transform=ccrs.PlateCarree()
)
ax2.set_title(f'Simulated Track Density ({PERCENTILE}th percentile)')
ax2.set_xlabel('Longitude')
ax2.set_extent([lon_bins[0], lon_bins[-1], lat_bins[0], lat_bins[-1]], crs=ccrs.PlateCarree())
ax2.add_feature(cfeature.COASTLINE.with_scale('50m'), edgecolor='white', linewidth=1.2)
ax2.add_feature(cfeature.LAND, facecolor='none', edgecolor='white', linewidth=1.2)

# Shared colorbar
cbar = fig.colorbar(im1, ax=[ax1, ax2], orientation='vertical', fraction=0.025, pad=0.04, label='Track Count')

# Save the figure as both TIFF and JPG files
plt.savefig('NAtrack_density_comparisonSTORM_50.tiff', format='tiff', dpi=300)
plt.savefig('NAtrack_density_comparisonSTORM_50.jpg', format='jpeg', dpi=300)
print("Plots saved as NAtrack_density_comparisonSTORM_50.tiff and NAtrack_density_comparisonSTORM_50.jpg")

plt.show()

print("Analysis complete!")
81
Reading STORMS data files...
Reading STORM/STORM_DATA_IBTRACS_NA_1000_YEARS_0.txt...
Reading STORM/STORM_DATA_IBTRACS_NA_1000_YEARS_1.txt...
Reading STORM/STORM_DATA_IBTRACS_NA_1000_YEARS_2.txt...
Reading STORM/STORM_DATA_IBTRACS_NA_1000_YEARS_3.txt...
Reading STORM/STORM_DATA_IBTRACS_NA_1000_YEARS_4.txt...
Reading STORM/STORM_DATA_IBTRACS_NA_1000_YEARS_5.txt...
Reading STORM/STORM_DATA_IBTRACS_NA_1000_YEARS_6.txt...
Reading STORM/STORM_DATA_IBTRACS_NA_1000_YEARS_7.txt...
Reading STORM/STORM_DATA_IBTRACS_NA_1000_YEARS_8.txt...
Reading STORM/STORM_DATA_IBTRACS_NA_1000_YEARS_9.txt...
Total records loaded: 3507558
Records after filtering for NA basin: 3507558
Number of unique simulations (years): 10000
Processing 100 random samples of 81 simulations each...
Completed 20/100 draws
Completed 40/100 draws
Completed 60/100 draws
Completed 80/100 draws
Completed 100/100 draws
Creating plots...
Plots saved as NAtrack_density_comparisonSTORM_50.tiff and NAtrack_density_comparisonSTORM_50.jpg

Analysis complete!