=head1 LICENSE

See the NOTICE file distributed with this work for additional information
regarding copyright ownership.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

=head1 CONTACT

  Please email comments or questions to the public Ensembl
  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.

  Questions may also be sent to the Ensembl help desk at
  <http://www.ensembl.org/Help/Contact>.

=head1 NAME

Bio::EnsEMBL::Xref::Parser::ZFINDescParser

=head1 DESCRIPTION

A parser class to parse the ZFIN file for descriptions.

-species = danio_rerio
-species_id = 7955
-data_uri = https://zfin.org/downloads/genetic_markers.txt
-file_format = TSV
-columns = [acc desc label ignored ignored]

=head1 SYNOPSIS

  my $parser = Bio::EnsEMBL::Xref::Parser::ZFINDescParser->new(
    source_id  => 149,
    species_id => 7955,
    files      => ['genetic_markers.txt'],
    xref_dba   => $xref_dba
  );

  $parser->run();

=cut



package XrefParser::ZFINDescParser;

use strict;
use warnings;
use Carp;
use Text::CSV;

use parent qw( XrefParser::BaseParser );

=head2 run
  Description: Runs the ZFINDescParser
  Return type: N/A
  Caller     : internal
=cut

sub run {
  my ($self, $ref_arg) = @_;

  my $source_id    = $ref_arg->{source_id};
  my $species_id   = $ref_arg->{species_id};
  my $files        = $ref_arg->{files};
  my $verbose      = $ref_arg->{verbose} // 0;

  if ( (!defined $source_id) || (!defined $species_id) || (!defined $files) ) {
    confess "Need to pass source_id, species_id and files as pairs";
  }

  my $file = @{$files}[0];

  #e.g.
  #ZDB-GENE-050102-1	metrn	meteorin, glial cell differentiation regulator	GENE	SO:0001217
  #ZDB-GENE-060824-3	a1cf	apobec1 complementation factor	GENE	SO:0001217
  #ZDB-GENE-090212-1	a2ml	alpha-2-macroglobulin-like	GENE	SO:0001217

  my $count = 0;
  my $withdrawn = 0;

  my $file_io = $self->get_filehandle($file);

  if ( !defined $file_io ) {
    confess "Can't open ZFINDesc file '$file'\n";
  }

  my $input_file = Text::CSV->new({
    sep_char       => "\t",
    empty_is_undef => 1,
    binary         => 1
  }) or confess "Cannot use file '$file': " . Text::CSV->error_diag();


  $input_file->column_names( [ 'zfin', 'label', 'desc', 'type', 'so' ] );

  while ( my $data = $input_file->getline_hr( $file_io ) ) {
    # Only interested in genes
    next if ($data->{'type'} ne 'GENE');
    
    # skip if WITHDRAWN: this precedes both desc and label
    if ( $data->{'label'} =~ /\A WITHDRAWN:/xms ) {
      $withdrawn++;
    }
    else {
      $self->add_xref({
        acc        => $data->{'zfin'},
        label      => $data->{'label'},
        desc       => $data->{'desc'},
        source_id  => $source_id,
        species_id => $species_id,
        info_type  => "MISC"
      });
      $count++;
    }
  }

  $input_file->eof or confess "Error parsing file $file: " . $input_file->error_diag();
  $file_io->close();

  if($verbose){
    print "$count ZFINDesc xrefs added, $withdrawn withdrawn entries ignored\n";
  }

  return 0;
}

1;
