#!/usr/bin/perl -w
# struct_harvest.pl
# May 2007 - Oct 2009
# dearl (a) soe ucsc edu
# v0.3
#
# see inline POD for details, or try:
# ./struct_harvest.pl --help
# ./struct_harvest.pl --man
#
########################################
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#
########################################
use warnings;
use strict;
use Getopt::Long;
use Pod::Usage;
use File::Glob ':glob';
use File::Basename;
use Statistics::Descriptive;
#####
sub usage;
sub versionCheck;
sub harvest;
sub evanno;
sub clumpp;
sub gnuplot;
sub renameFiles;
sub help;
#####
my $IN_DIR;
my $OUT_DIR;
my ($isHarvest, $isEvanno, $isCLUMPP, $isGNUPLOT);
my ($isOptionVersion, $isOptionHelp, $isOptionMan, $isOptionDebug);
my $isOptionRename;
my $VERSION = 'v0.3, Oct 2009.';
GetOptions('dir=s' => \$IN_DIR
,'out=s' => \$OUT_DIR
,'harvest' => \$isHarvest
,'evanno' => \$isEvanno
,'clumpp' => \$isCLUMPP
,'gnuplot' => \$isGNUPLOT
,'rename' => \$isOptionRename
,'version' => \$isOptionVersion
,'help' => \$isOptionHelp
,'man' => \$isOptionMan
,'debug' => \$isOptionDebug);
pod2usage(-verbose => 2) if $isOptionMan;
versionCheck($VERSION) if($isOptionVersion);
help() if($isOptionHelp);
usage() unless($IN_DIR);
usage() unless( (($OUT_DIR) && ($isHarvest || $isEvanno || $isCLUMPP || $isGNUPLOT)) || $isOptionRename);
usage() unless( -d $IN_DIR);
if($OUT_DIR){
if(! -d $OUT_DIR){
mkdir($OUT_DIR) or die "$0: unable to make directory, $OUT_DIR. $!; $?\n";
}
unless ($OUT_DIR =~ /\/$/){
$OUT_DIR="$OUT_DIR/";
}
}
############################################################
=pod
=head1 struct_harvest.pl
F is a tool to extract information from STRUCTURE Results
folders. This is the offline version of
http://taylor0.biology.ucla.edu/struct_harvest/
=head1 SYNOPSIS
./struct_harvest.pl [options]
Input:
--dir [specify the path to your STRUCTURE Results folder]
--out [specify the path to an out directory. If it doesn\'t exist,
it will be created.]
--rename [a flag. Will rename your results to remove whitespace and will
standardize numbers, i.e. 5 -> 0005]
--harvest [a flag. Runs the Harvester. Output is out_harvest.txt]
--evanno [a flag. Runs the Evanno method. Output is out_evanno.summary.dat]
--clumpp [a flag. Generates clumpp (min K - max K).indfiles.]
--gnuplot [a flag. Generates .dat files and .p files for use with gnuplot]
--help [a flag. Prints this message]
--man [a flag. Prints full documentation]
--version [a flag. Prints current version and web address]
=head1 DESCRIPTION
F
The script takes the paths to your Results directory and an output directory
and then, depending on the flags you pass, extracts data from the results
and writes them to the output directory.
STRUCTURE:
J. Pritchard, M. Stephens, P. Donnelly. 2000. Genetics 155:945-959.
http://www.genetics.org/cgi/content/full/155/2/945
http://pritch.bsd.uchicago.edu/structure.html
CLUMPP:
M. Jakobsson, N. Rosenberg 2007. CLUMPP: a cluster matching and permutation
program for dealing with label switching and multimodality in analysis of
population structure. Bioinformatics 23(14): 1801-1806.
http://bioinformatics.oxfordjournals.org/cgi/content/full/23/14/1801
http://rosenberglab.bioinformatics.med.umich.edu/clumpp.html
Evanno Method:
G. Evanno, S. Regnaut, J. Goudet 2005 Detecting the number of clusters of
individuals using the software structure: a simulation study. Molecular
Ecology 14(8): 2611-2620.
http://doi.wiley.com/10.1111/j.1365-294X.2005.02553.x
GNUPLOT:
http://www.gnuplot.info/
=head1 AUTHOR
Dent A. Earl, dearl (a) soe ucsc edu
=head1 DATE
October 2009
=head1 LICENSE
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
=cut
############################################################
################################################################################
# main
my %data; # $data{$k}{$fileName}{runNum} = run number
# {ln} = Ln(P)
# {mean} = mean
# {var} = variance
# harvest() runs no matter what, only prints if $isHarvest
renameFiles($IN_DIR) if($isOptionRename);
harvest($IN_DIR, $OUT_DIR, \%data, $isHarvest);
evanno($IN_DIR, $OUT_DIR, \%data, $isGNUPLOT) if ($isEvanno);
clumpp($IN_DIR, $OUT_DIR, \%data) if ($isCLUMPP);
gnuplot($IN_DIR, $OUT_DIR, \%data) if ($isGNUPLOT);
# end main
################################################################################
##############################
#
sub usage{
print "USAGE: $0 --dir --out