#!/usr/bin/env perl
# Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Copyright [2016-2025] EMBL-European Bioinformatics Institute
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#      http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


our $dir = shift @ARGV;

#compFastq ();
splitFastq();

sub compFastq{
while (<$dir/*fastq>) {
  my $fastq_name = $_;
  $fastq_name =~ s/\.fasta//;
  print "fastq_name is $fastq_name\n";
  system("compFastq_linux $_ $fastq_name");
}
}

sub splitFastq {
  while (<$dir/perl*.fastq>) {
    my $fastq_name = $_;
    open IN, "$fastq_name" or die "Can't open fastq file\n";
    open OUT,">$fastq_name\_split" or die "Can't open fastq split file\n";
    my ($seq_head,$qual_head,%seq_n,%done);
    
    while (<IN>) {
      my ($seq,$qual,@seqs,@quals,$n);
      chomp;
      if (/^\@/) {
        $seq_head = $_;
        $seq_head =~ s/^\@//;
        $done{'seq_head'}=1;
        #print "seq_head is $seq_head\n";
      }
      elsif (/^[A-Za-z]/) {
        $seq = $_;
        $done{'seq'}=1;
        @seqs = split '', $seq;
        $n=0;
        for ($n=0;$n+2000<length($seq);$n=$n+2000) {
          $seq_n{$n}{'seq'}=join '',@seqs[$n..$n+2000-1];
          $seq_n{$n}{'seq_head'} = "$n\_$seq_head";
        }
        $seq_n{$n}{'seq'}=join '',@seqs[$n..$#seqs];
        $seq_n{$n}{'seq_head'} = "$n\_$seq_head";
      }
      elsif (/^\+/) {
        $qual_head = $_;
        $qual_head =~ s/^\+//;
        $done{'qual_head'}=1;
        #print "qual_head is $qual_head\n";
      }
      elsif (/^\!/) {
        $qual = $_;
        $done{'qual'}=1;
        @quals = split '', $qual;
        $n=0;
        for ($n=0;$n+2000<length($qual);$n=$n+2000) {
          $seq_n{$n}{'qual'}=join '',@quals[$n..$n+2000-1];
          $seq_n{$n}{'qual_head'} = "$n\_$qual_head";
        }
        $seq_n{$n}{'qual'}=join '',@quals[$n..$#quals];
        $seq_n{$n}{'qual_head'} = "$n\_$qual_head";
      }                                                                     
      if ($done{'seq_head'} and $done{'seq'} and $done{'qual_head'} and $done{'qual'}) {
        foreach my $n (sort {$a<=>$b} keys %seq_n) {
          my $seq = $seq_n{$n}{'seq'};
          my $qual = $seq_n{$n}{'qual'};
          my $seq_head = $seq_n{$n}{'seq_head'};
          my $qual_head = $seq_n{$n}{'qual_head'};
          print OUT "\@$seq_head\n$seq\n\+$qual_head\n$qual\n";
        }
        undef %done;
        undef %seq_n;
      }
    }
  }
}
    
