#!/usr/bin/perl
use strict;
use warnings;

# Script to make forecasts with CPT from SINGLE GCM start date: 
#   here from Jan 2008.
# Andrew Robertson, 12/08/2008
# 12/16/08: version to output deterministic forecast values 
# and prediction error variances (only)

# working directory
# chdir "/Users/andy/data/Americas/Nordeste/CPT";
`cp /Users/andy/pgm/stats/CPT/9.04a/CPT.x .`;
`cp /Users/andy/pgm/stats/CPT/9.04a/CPT.ini .`;

my $maxlead=1;	# maximum lead time
my $firststart=1;	# first GCM start time to use (months from Jan) 
my $laststart=1;	# last GCM start time to use (months from Jan) 
my $yrB=1973; 	# beginning year of TRAINING period  	
my $yrE=2001; 	# ending year of TRAINING period 
# NB: obs data must be available for one additional year
my $ltrain=29;	# length of training period

# x data limits (ie GCM)
my $latNx=10;	# northern bounding lat
my $latSx=-20;	# southern bounding lat
my $lonWx=-70;	# western bounding lon
my $lonEx=-10;	# eastern bounding lon
		
# y data limits (ie local rainfall data)		
my $latNy=-3;
my $latSy=-10;
my $lonWy=-42;
my $lonEy=-36;

# Forecast start date
my $fmon='Jan';
my $fyear=2008;

# TRAINING DATA:	 
# get GCM fcsts for 1973-2001 (rainfall data goes 2002)
# note: CPT doesn't like "0000 1 Jan" format
# note: can't convert ECHAM-CA precip units m/s --> mm/day in DL
# NB: URLs are hardwrired for lat, lon, year ranges; should be subst!

# ECHAM-CA
#my $xsource='http://iridl.ldeo.columbia.edu/expert/SOURCES/.IRI/.FD/.ECHAM4p5/.Forecast/.ca_sst/.ensemble24/.MONTHLY/.prec/X/%2870W%29%2810W%29RANGEEDGES/Y/%2810N%29%2820S%29RANGEEDGES/L/%281.5%29%284.5%29RANGEEDGES/S/%281%20Jan%201973-2001%29VALUES%5BM/L%5Daverage/-999/setmissing_value/datatable.tsv';
my
$x_str1='http://iridl.ldeo.columbia.edu/expert/SOURCES/.IRI/.FD/.ECHAM4p5/.Forecast/.ca_sst/.ensemble24/.MONTHLY/.prec/X/%2870W%29%2810W%29RANGEEDGES/Y/%2810N%29%2820S%29RANGEEDGES/L/%28';
my $x_str2='%201973-2001%29VALUES%5BM/L%5Daverage/-999/setmissing_value/%5BX/Y%5D%28%25d%20%25b%20%25Y%5BS%5D%20%29datatable.tsv';

# Brazil CRU gridded data
#my $ysource='http://iridl.ldeo.columbia.edu/expert/SOURCES/.UEA/.CRU/.TS2p1/.monthly/.prcp/Y/%2810S%29%283S%29RANGEEDGES/X/%2842W%29%2836W%29RANGEEDGES/T/%28Jan%201973%29%28Dec%202002%29RANGE/T/12/splitstreamgrid/T/%28Feb%29%28Mar%29%28Apr%29VALUES%5BT%5Daverage/-999/setmissing_value/Y/high/low/RANGE/%5BX/Y%5D%28%25d%20%25b%20%25Y%5BT2%5D%20%29datatable.tsv';
my $y_str1='http://iridl.ldeo.columbia.edu/expert/SOURCES/.UEA/.CRU/.TS2p1/.monthly/.prcp/Y/%2810S%29%283S%29RANGEEDGES/X/%2842W%29%2836W%29RANGEEDGES/T/%28Jan%201973%29%28Dec%202002%29RANGE/T/12/splitstreamgrid/T/%28';
my $y_str2='%29VALUES%5BT%5Daverage/-999/setmissing_value/Y/high/low/RANGE/%5BX/Y%5D%28%25d%20%25b%20%25Y%5BT2%5D%20%29datatable.tsv';

# FORECAST DATA:	
#http://iridl.ldeo.columbia.edu/expert/SOURCES/.IRI/.FD/.ECHAM4p5/.Forecast/.ca_sst/.ensemble24/.MONTHLY/.prec/X/%2870W%29%2810W%29RANGEEDGES/Y/%2810N%29%2820S%29RANGEEDGES/L/%281.5%29%284.5%29RANGEEDGES/S/%281%20Jan%202008%29VALUES%5BM/L%5Daverage/
my $xfor_str2='%29VALUES%5BM/L%5Daverage/-999/setmissing_value/%5BX/Y%5D%28%25d%20%25b%20%25Y%5BS%5D%20%29datatable.tsv';

my @start=('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec','Jan','Feb','Mar','Apr','May','Jun');
my $s=0; my $lead=0;

for( $s=$firststart; $s<=$laststart; $s=$s+1 ) {
  # Looping over start month      
  print "GCM Start month: $start[$s-1] \n";
  for( $lead=1; $lead<=$maxlead; $lead=$lead+1 ) { 
      my $lead1=${lead}+0.5; my $lead2=${lead}+3.5;
      print "GCM Forecast lead: ${lead1}-${lead2}\n\n";
  
      # get the data files
      # `wget ${x_str1}$start[$s-1]${x_str2}`;   # fixed lead
      # multiple lead:
      `wget ${x_str1}${lead1}%29%28${lead2}%29RANGEEDGES/S/%281%20$start[$s-1]${x_str2}`;
      
      `mv -f "Y](%d %b %Y[S] )datatable.tsv" xdata.tsv`;
      print "Precip Target season: $start[$s]-$start[$s+2] \n";
      # `wget ${y_str1}$start[$s]-$start[$s+2]${y_str2}`;	# station data
      `wget ${y_str1}$start[$s]'%29%28'$start[$s+1]'%29%28'$start[$s+2]${y_str2}`;	# gridded data
      `mv -f "Y](%d %b %Y[T2] )datatable.tsv" ydata.tsv`;
      
      # fcst data file
      print "GCM Forecast data file:\n";
      `wget ${x_str1}${lead1}%29%28${lead2}%29RANGEEDGES/S/%281%20${fmon}%20${fyear}${xfor_str2}`;
      `mv -f "Y](%d %b %Y[S] )datatable.tsv" xfcst_data.tsv`;
      
      # Creating parameter file
      `rm -f params`;
      `touch params`;
      # option : 1 for CCA; 2 for PCR 
	`echo '1' >> params`;
	# X input data 
	`echo 'xdata.tsv' >> params`; 
	# lats N bound 
	`echo "$latNx" >> params`;
	#lats S bound  
	`echo "$latSx" >> params`;
	#lons W bound 
	`echo "$lonWx" >> params`;
	#lons E bound
	`echo "$lonEx" >> params`;
	# 1st year of X training 
	`echo '$yrB' >> params`;
	
	# Y input data
	`echo 'ydata.tsv' >> params`; 
	# lats N bound
	`echo "$latNy" >> params`;
	#lats S bound
	`echo "$latSy" >> params`;
	#lons W bound
	`echo "$lonWy" >> params`;
	#lons E bound
	`echo "$lonEy" >> params`;
	# 1st year of Y training
	`echo '$yrB' >> params`;
 
	# Length of training period 
	`echo "$ltrain" >> params`;
	# Legth of cross-validation window 
	`echo '3' >> params`;
	# min # of X EOF modes 
	`echo '1' >> params`;
	# max # of X EOF modes
	`echo '5' >> params`;
	# min # of Y EOF modes
	`echo '1' >> params`;
	# max # of Y EOF modes
	`echo '5' >> params`;
	# min # of CCA modes
	`echo '1' >> params`;
	# max # of CCA modes
	`echo '3' >> params`;
	
	# missing values	
	`echo '11' >> params`; 
	# X
	`echo '-999' >> params`;
	# Maximum % of missing values:
	`echo '10' >> params`;
	# replace w/long-term mean
	`echo '1' >> params`;	
	# 
	`echo '-999' >> params`; 
	# Maximum % of missing values:
	`echo '10' >> params`;
	# replace w/long-term mean
	`echo '1' >> params`;	
	
	# option for analysis (eg, 15 for cross-validated analysis)	
	`echo '15' >> params`; 
	# Data output
	`echo '1' >> params`;  
	# 1. Cross-Validated Predictions
	`echo '1' >> params`;
	# outfile name
	`echo 'CrossValPred_Start${s}_Lead${lead}' >> params`; 
	# Y Input Data
	`echo '20' >> params`;
	# Y Input Data Output file:
	`echo 'YinputData_Start${s}_Lead${lead}' >> params`; 
	# exit to previous menu
	`echo '0' >> params`;
	# skill maps
	`echo '9' >> params`;
	# Pearson skill score
	`echo '1' >> params`;
	# outfile name
	`echo 'PearsonR_Start${s}_Lead${lead}' >> params`; 
	
	# Forecast settings
	`echo '17' >> params`;
	# Prediction interval confidence level (%):
	`echo '95' >> params`;
	# Error variance:
	# 1. Cross-validated error variance
	# 3. Fitted error variance
	`echo '1' >> params`;
	# Open forecast file
	`echo '16' >> params`;
	`echo 'xfcst_data.tsv' >> params`; 
	# First year from which to forecast:
	`echo '$fyear' >> params`;
	# Number of years to forecast:
	`echo '1' >> params`;
	
	# Write out the fcst values & error variances
	`echo '20' >> params`;
	# Save results (Y/N)?
	`echo 'Y' >> params`;
	`echo '2' >> params`;
	# Forecasts Output file:
	`echo 'Forecasts_Start${s}_Lead${lead}' >> params`; 
	`echo '3' >> params`;
	# Prediction error variances Output file:
	`echo 'PredErrorVars_Start${s}_Lead${lead}' >> params`; 
	
	# exit cpt 
	`echo '0' >> params`;
	`echo '0' >> params`;
	`echo '0' >> params`;
	`echo '0' >> params`;
	
	`rm -f YinputData_Start${s}_Lead${lead}.txt`;
	`rm -f PearsonR_Start${s}_Lead${lead}.txt`;
	`rm -f CrossValPred_Start${s}_Lead${lead}.txt`;
	`rm -f Forecasts_Start${s}_Lead${lead}.txt`;
	`rm -f PredErrorVars_Start${s}_Lead${lead}.txt`;
	`rm -f Fcst_stout_Start${s}_Lead${lead}.txt`;
	
	`CPT.x < params > Fcst_stout_Start${s}_Lead${lead}.txt`;
#	`rm params`
}   
}