#!/usr/bin/perl
use strict;
use warnings;

# Script to run CPT for 12 3-month running seasons,
#  using "wget" to get X & Y data from the Data Library
# Andrew Robertson, 12/03/2008
# CPT recompiled on Mac 9.04 version, using gfortran
# 12/15/08: modified to read CPT v9.10
# 03/11/08: modified to run on manta (linux)
# 03/19/09: various bugs fixed in IRIDL paths; saved as "cptLEARN3.pl"
# 11/17/09 & 1/1/10: v4: revised for CPTv10
#
# Outputs:	PearsonR maps
#		CrossValPred (cross validated predictions)
# 

# working directory
chdir "/Users/andy/data/CPTv10/perlTest";
`cp /Users/andy/pgm/stats/CPT/10.04/CPT.x .`;
`cp /Users/andy/pgm/stats/CPT/10.04/CPT.ini .`;

my $maxlead=4;	# maximum lead time
my $firststart=9;	# first GCM start time to use (months from Jan) 
my $laststart=9;	# last GCM start time to use (months from Jan) 
#my $yrB=1973; 	# beginning year of period  	
#my $yrE=1973; 	# ending year of period 
			# NB: obs data must be available for one more year
#my $ltrain=29;	# length of training period
# CPTv10: uses full period in data files & automatically matches start year

# x data limits (ie GCM)
my $latNx=10;	# northern bounding lat
my $latSx=-20;	# southern bounding lat
my $lonWx=-70;	# western bounding lon
my $lonEx=-10;	# eastern bounding lon
		
# y data limits (ie local rainfall data)		
my $latNy=-3;
my $latSy=-10;
my $lonWy=-42;
my $lonEy=-36;
	 
# get GCM fcsts for 1973-2001 (rainfall data goes 2002)
# NB: URLs are hardwrired for lat, lon, year ranges; SHOULD BE SUBST!

my $xsource='http://iridl.ldeo.columbia.edu/expert/SOURCES/.IRI/.FD/.ECHAM4p5/.Forecast/.ca_sst/.ensemble24/.MONTHLY/.prec/X/%2870W%29%2810W%29RANGEEDGES/Y/%2810N%29%2820S%29RANGEEDGES/L/%281.5%29%284.5%29RANGEEDGES/S/%281%20Jan%201973-2001%29VALUES%5BM%5Daverage%5BL%5D//keepgrids/average/%28mm/day%29unitconvert/-999/setmissing_value%5BX/Y%5D%5BS/L/add//name//T/def%5Dcptv10.tsv';

my
$x_str1='http://iridl.ldeo.columbia.edu/expert/SOURCES/.IRI/.FD/.ECHAM4p5/.Forecast/.ca_sst/.ensemble24/.MONTHLY/.prec/X/%2870W%29%2810W%29RANGEEDGES/Y/%2810N%29%2820S%29RANGEEDGES/L/%28';
my $x_str2='%201973-2001%29VALUES%5BM%5Daverage%5BL%5D//keepgrids/average/%28mm/day%29unitconvert/-999/setmissing_value%5BX/Y%5D%5BS/L/add//name//T/def%5Dcptv10.tsv';

# Brazil CRU gridded data
my $ysource='http://iridl.ldeo.columbia.edu/expert/SOURCES/.UEA/.CRU/.TS2p1/.monthly/.prcp/Y/%2810S%29%283S%29RANGEEDGES/X/%2842W%29%2836W%29RANGEEDGES/T/%28Jan%201973%29%28Dec%202002%29RANGEEDGES/T/%28Feb-Apr%29seasonalAverage/-999/setmissing_value%5BX/Y%5D%5BT%5Dcptv10.tsv';
my $y_str1='http://iridl.ldeo.columbia.edu/expert/SOURCES/.UEA/.CRU/.TS2p1/.monthly/.prcp/Y/%2810S%29%283S%29RANGEEDGES/X/%2842W%29%2836W%29RANGEEDGES/T/%28Jan%201973%29%28Dec%202002%29RANGEEDGES/T/%28';
my $y_str2='%29seasonalAverage/-999/setmissing_value%5BX/Y%5D%5BT%5Dcptv10.tsv';

my @start=('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec','Jan','Feb','Mar','Apr','May','Jun');
my $s=0; my $lead=0; my $endyr=0; my $startyr=0;

for( $s=$firststart; $s<=$laststart; $s=$s+1 ) {
  # Looping over start month      
  print "GCM Start month: $start[$s-1] \n";
  for( $lead=1; $lead<=$maxlead; $lead=$lead+1 ) { 
      my $lead1=${lead}+0.5; my $lead2=${lead}+2.5;
      print "Forecast lead: ${lead1}-${lead2}\n\n";
#       if($s+$lead < 11){$startyr=$yrB;} else {$startyr=$yrB+1;}
#       # GCM "startyr" used in param file: incremented by 1 for targets 
#       #   NDJ, DJF, so we need to start in 1974
## .. no longer needed w/v10 because full date info is encoded

#       if($s+$lead < 11){$startyr=$yrB;} else {$startyr=$yrB+1;}
#       if($s+$lead < 10){$endyr=$yrE;} else {$endyr=$yrE+1;}
#       # Obs rainfall "endyr" used in "wget": incremented by 1 for Oct+ starts
#       #   (ie NDJ targets, because they extend into the following year)
## .. no longer needed because v10 automatically matches start years by default

#       my $shift=${lead}+1;
#       # "shift" is used in "wget" of GCM to match years of x & y, since
#       # dates of x are fcst start times; they need to refer to the target
#       # season. Since date of 3-month seasons is centered, increment by 1
## .. kluge no longer needed w/v10
            
#      print "Start & end year: ${startyr} ${endyr}\n";
      print "Target season: $start[$s+$lead-1]-$start[$s+$lead+1] \n";
        
      # get the data files
      # x (GCM):
      `wget ${x_str1}${lead1}%29%28${lead2}%29RANGEEDGES/S/%281%20$start[$s-1]${x_str2}`;
      `mv -f "def]cptv10.tsv" xdata.tsv`;
            
      # y (rainfall obs):      
      `wget ${y_str1}$start[$s+$lead-1]-$start[$s+$lead+1]${y_str2}`; 
      `mv -f "Y][T]cptv10.tsv" ydata.tsv`; 
      
      # Creating parameter file
      `rm -f params`;
      `touch params`;
      
      # option : 1 for CCA; 2 for PCR 
	`echo '1' >> params`;
	# 1.  Open X input file
	`echo '1' >> params`;
	# X input data 
	`echo 'xdata.tsv' >> params`; 
	# lats N bound 
	`echo "$latNx" >> params`;
	#lats S bound  
	`echo "$latSx" >> params`;
	#lons W bound 
	`echo "$lonWx" >> params`;
	#lons E bound
	`echo "$lonEx" >> params`;
	# X EOF options
	# Minimum number of X EOF modes:
  	`echo '1' >> params`;
  	# max # of X EOF modes
  	`echo '5' >> params`;	

	# 2.  Open Y input file
	`echo '2' >> params`;
	# Y input data 
	`echo 'ydata.tsv' >> params`; 
	# lats N bound 
	`echo "$latNy" >> params`;
	#lats S bound  
	`echo "$latSy" >> params`;
	#lons W bound 
	`echo "$lonWy" >> params`;
	#lons E bound
	`echo "$lonEy" >> params`;
	# Y EOF options
	# Minimum number of Y EOF modes:
  	`echo '1' >> params`;
  	# max # of Y EOF modes
  	`echo '5' >> params`;	
  	
  	# min # of CCA modes
	`echo '1' >> params`;
	# max # of CCA modes
	`echo '3' >> params`;
	
	# option for analysis (eg, 13 for cross-validated analysis)	
	`echo '13' >> params`; 
	# 10.  Data output
	`echo '10' >> params`; 
	# 3. Cross-Validated Predictions
	`echo '3' >> params`;
	# outfile name
	`echo 'CrossValPred_Start${s}_Lead${lead}' >> params`; 
 	# Y Input Data
 	`echo '2' >> params`;
  	# Y Input Data Output file:
  	`echo 'YinputData_Start${s}_Lead${lead}' >> params`; 
 	# exit to previous menu
 	`echo '0' >> params`;
 	# skill maps
 	`echo '18' >> params`;
 	# Pearson skill score
 	`echo '1' >> params`;
 	# outfile name
 	`echo 'PearsonR_Start${s}_Lead${lead}' >> params`; 	
 	# exit cpt 
 	`echo '0' >> params`;
 	
	# delete output file if it already exists
	`rm -f YinputData_Start${s}_Lead${lead}.txt`;
	`rm -f PearsonR_Start${s}_Lead${lead}.txt`;
	`rm -f CrossValPred_Start${s}_Lead${lead}.txt`;
	`rm -f stout_Start${s}_Lead${lead}.txt`;
	
	`CPT.x < params > stout_Start${s}_Lead${lead}.txt`;
#	`rm params`
}   
}
