#!/usr/bin/perl -w -CAL
###
### STILGrep - a tool for searching the STIL database
### Programmed by Matti 'ccr' Hamalainen <ccr@tnsp.org>
### (C) Copyright 2011-2020 Tecnic Software productions (TNSP)
###
use warnings;
use strict;
use utf8;


###
### Globals
###
my $prg_version = "0.8.3";
my $prg_name = $0;
$prg_name =~ s#^.*/([^/]+)$#$1#;
my $errflag = 0;
my $data = {};
my $opt_no_fields_set;


###
### Default settings
###

# Regex of allowed field names for -f and -F options
my $opt_field_names = "FILENAME|TITLE|ARTIST|COMMENT|NAME|AUTHOR";


my %settings = (
  "match_fields" => { "type" => 2, "sopt" => "-f", "default" => [],
    "parse" => sub($$$) { return parse_field_array($_[0], $_[1], $_[2], $opt_field_names) }
  },

  "out_fields"   => { "type" => 2, "sopt" => "-F", "default" => [],
    "parse" => sub($$$) { return parse_field_array($_[0], $_[1], $_[2], $opt_field_names) }
  },

  "help"         => { "type" => 1, "sopt" => "-?", "lopt" => "--help", "default" => 0 },
  "nocase"       => { "type" => 1, "sopt" => "-i", "default" => 0, "config" => 2 },
  "invert"       => { "type" => 1, "sopt" => "-v", "default" => 0 },
  "quiet"        => { "type" => 1, "sopt" => "-q", "default" => 0, "config" => 2 },
  "names"        => { "type" => 1, "sopt" => "-l", "default" => 0 },

  "dump"         => { "type" => 1, "sopt" => "-D", "default" => 0 },

  "command"      => { "type" => 2, "sopt" => "-c", "default" => undef },
  "expr"         => { "type" => 2, "default" => undef },

  "color"        => { "type" => 2, "sopt" => "-C", "lopt" => "--color",
    "default" => "auto", "valid" => "on|off|auto", "config" => 2 },

  "hvsc_path"    => { "type" => 2, "sopt" => "-H",
    "default" => undef, "config" => 1 },

  "stil_file"    => { "type" => 2, "sopt" => "-S",
    "default" => undef, "config" => 1 },
);


###
### Helper functions
###
sub do_grep($)
{
  my ($field) = @_;
  my $hit;
  my $expr = opt_get("expr");

  if (opt_get("nocase"))
  {
    $hit = ($field =~ /$expr/i);
  }
  else
  {
    $hit = ($field =~ /$expr/);
  }

  return opt_get("invert") ? !$hit : $hit;
}


sub is_field_enabled($)
{
  return 1 if (!$opt_no_fields_set);

  foreach my $field (@{opt_get("match_fields")})
  {
    return 1 if ($_[0] =~ /$field/i);
  }
  return 0;
}


sub output_line($)
{
  if (!opt_get("invert") && ((opt_get("color") eq "auto" && -t STDOUT) || opt_get("color") eq "on"))
  {
    my $expr = opt_get("expr");
    my $line = $_[0];
    if (opt_get("nocase"))
    {
      $line =~ s/($expr)/sprintf("\e[1;31m%s\e[0m", $1)/ige;
    }
    else
    {
      $line =~ s/($expr)/sprintf("\e[1;31m%s\e[0m", $1)/ge;
    }
    return $line;
  }
  else
  {
    return $_[0];
  }
}


sub output_field($$)
{
  my ($song, $mfield) = @_;
  if (defined($$data{"songs"}{$song}{$mfield}))
  {
    if ($song > 0)
    {
      print "  #$song [".$mfield."]: ".output_line($$data{"songs"}{$song}{$mfield})."\n";
    }
    else
    {
      print "  [".$mfield."]: ".output_line($$data{"songs"}{$song}{$mfield})."\n";
    }
  }
}


sub opt_error($$)
{
  my ($ctx, $msg) = @_;

  if (defined($ctx))
  {
    print "[".$$ctx{"filename"}.":".$$ctx{"nline"}."]: ".$msg;
  }
  else
  {
    print "ERROR: ".$msg;
  }

  $errflag = 1;
  return 0;
}


sub parse_field_array($$$$)
{
  my ($ctx, $opt, $arg, $valid) = @_;
  my $fields = [];

  foreach my $tfield (split(/\s*,\s*/, $arg))
  {
    if ($tfield =~ /^all$/io)
    {
      push(@${fields}, split(/\|/, $valid));
      return $fields;
    }

    return opt_error($ctx,
      "Invalid value '".$tfield."' for ".$opt.": valid values are ".$valid." or 'ALL'.\n")
      unless ($tfield =~ /^($valid)$/io);

    push(@${fields}, uc($tfield)) unless grep(/^${tfield}$/i, $fields);
  }

  return $fields;
}


sub opt_chkarg($$$)
{
  my ($ctx, $opt, $arg) = @_;
  die("Invalid option setting '".$opt."'.\n") unless defined($settings{$opt});

  my $extra = "";
  if (defined($settings{$opt}{"valid"}))
  {
    my @tmp = split(//, $settings{$opt}{"valid"});
    $extra = " (".join(", ", @tmp).").";
  }
  else
  {
    $extra = ".";
  }

  my $tmp = shift(@ARGV) or opt_error($ctx, "Option ".$arg." (".$opt.") requires an argument".$extra."\n");
  return $tmp;
}


sub opt_set($$$)
{
  my ($ctx, $opt, $value) = @_;
  my $err;

  return opt_error($ctx, "Invalid setting '".$opt."'.\n") unless defined($settings{$opt});

  if (defined($ctx) && (!defined($settings{$opt}{"config"}) || !$settings{$opt}{"config"}))
  {
    return opt_error($ctx, "Setting '".$opt."' can not be set in the configuration file.\n");
  }

  if ($settings{$opt}{"type"} == 1)
  {
    if ($value =~ /^(1|on|yes|true)$/io)
    {
      $value = 1;
    }
    elsif ($value =~ /^(0|off|no|false)$/io)
    {
      $value = 0;
    }
    else
    {
      $err = ", valid values are [1|on|yes|true] or [0|off|no|false]";
    }
  }
  if ($settings{$opt}{"type"} == 3)
  {
    if ($value =~ /^\d+$/io)
    {
      $value = int($value);
    }
    else
    {
      $err = ", expected an integer";
    }
  }
  elsif (defined($settings{$opt}{"valid"}))
  {
    my $valid = $settings{$opt}{"valid"};
    $err = ", valid values are ".$valid unless ($value =~ /^$valid/);
  }

  if (defined($err))
  {
    return opt_error($ctx,
      "Invalid value for ".$opt.": '".$value."'".$err.".\n");
  }

  $settings{$opt}{"value"} = $value;

  return 1;
}


sub opt_get
{
  die("Invalid option setting '".$_[0]."'.\n") unless defined($settings{$_[0]});

  if (defined($settings{$_[0]}{"value"}))
  {
    return $settings{$_[0]}{"value"};
  }
  elsif (defined($settings{$_[0]}{"default"}))
  {
    return $settings{$_[0]}{"default"};
  }
  else
  {
    return defined($_[1]) ? $_[1] : undef;
  }
}


###
### Main code begins
###

##
## Parse configuration file if it exists
##
my $cfg_file = "stilgrep.cfg";
my $home_dir = defined($ENV{"HOME"}) ? $ENV{"HOME"} : "";
my $xdg_dir = $home_dir."/.config";
my $cfg_path;

if (-e $home_dir."/.".$cfg_file && -f _ && -r _)
{
  $cfg_path = $home_dir."/.".$cfg_file;
}
else
{
  $cfg_path = $xdg_dir."/".$cfg_file;
}


if (-e $cfg_path && -f _ && -r _)
{
  open(CFGFILE, "<:encoding(utf8)", $cfg_path) or
    die("Could not open configuration file '".$cfg_path."': ".$!."\n");

  my $pctx = { "nline" => 0, "filename" => $cfg_path };
  while (defined(my $line = <CFGFILE>))
  {
    $line =~ s/^\s+//;
    $line =~ s/\s+$//;
    $$pctx{"line"} = $line;
    $$pctx{"nline"}++;

    if ($line =~ /(^#|^$)/)
    {
      # Ignore comments and empty lines
    }
    elsif ($line =~ /^([a-zA-Z0-9_]+)\s*=\s*\"(.*?)\"$/)
    {
      opt_set($pctx, lc($1), $2);
    }
    elsif ($line =~ /^([a-zA-Z0-9_]+)\s*=\s*(.*?)$/)
    {
      opt_set($pctx, lc($1), $2);
    }
    else
    {
      opt_error($pctx, "Syntax error: ".$line."\n");
    }
  }

  close(CFGFILE)
}

exit(1) if ($errflag);


##
## Parse commandline options
##
my $pctx = undef;
while (defined(my $arg = shift(@ARGV)))
{
  my $opt;
  foreach my $mkey (keys %settings)
  {
    my $sset = $settings{$mkey};
    if (
      (defined($sset->{"sopt"}) && $arg eq $sset->{"sopt"}) ||
      (defined($sset->{"lopt"}) && $arg eq $sset->{"lopt"})
      )
    {
      $opt = $mkey;
    }
  }

  if (defined($opt))
  {
    my $sset = $settings{$opt};
    if ($sset->{"type"} == 1)
    {
      opt_set($pctx, $opt, 1);
    }
    elsif ($sset->{"type"} == 2)
    {
      my $tmp = opt_chkarg($pctx, $opt, $arg);

      if (defined($sset->{"parse"}))
      {
        $tmp = $sset->{"parse"}->($pctx, $arg, $tmp);
      }

      opt_set($pctx, $opt, $tmp);
    }
  }
  elsif (length($arg) > 0)
  {
    die("Only one regular expression argument allowed.\n") if defined(opt_get("expr"));
    opt_set($pctx, "expr", $arg);
  }
}

##
## Check HVSC / STIL paths
##
if (!defined(opt_get("hvsc_path")) && defined($ENV{"HVSC_BASE"}))
{
  opt_set(undef, "hvsc_path", $ENV{"HVSC_BASE"});
}

if (!defined(opt_get("stil_file")) && defined(opt_get("hvsc_path")))
{
  opt_set(undef, "stil_file", opt_get("hvsc_path")."/DOCUMENTS/STIL.txt");
}


##
## Dump config if requested
##
if (opt_get("dump"))
{
  print "# STILGrep version ".$prg_version." configuration file\n";

  foreach my $item (sort keys %settings)
  {
    if (defined($settings{$item}{"config"}) &&
        $settings{$item}{"config"} > 0)
    {
      my $sset = $settings{$item};
      my $sval = opt_get($item);

      print "#" if ($sset->{"config"} > 1);
      print $item." = ";

      if ($sset->{"type"} == 1)
      {
        print $sval ? "on" : "off";
      }
      elsif ($sset->{"type"})
      {
        print "\"".$sval."\"";
      }
      print "\n";
    }
  }
  exit(0);
}


##
## Check if user needs help
##
exit(1) if ($errflag && !opt_get("help"));

die("STILGrep v".$prg_version." by ccr/TNSP (C) 2011-2020
Usage: ".$prg_name." [options] '<regular expression>'

 -?  --help  Show this help
 -q          Be quieter / less verbose
 -i          Ignore case (be case insensitive)
 -l          List filenames only (-i and -v affect results)
 -v          Invert match (aka list non-matches)
 -f <field>  Match specified <field(s)> ONLY, you can specify
             several -f copyright,comment or -f all for all fields
 -F <field>  Output these fields (in addition to matching field),
             example: -F copyright,comment or -F all for all fields
 -c <cmd>    Execute shell command <cmd> for each matching
             file with filename as argument
 -C <mode>   Use ANSI colour in output (on|off|auto). The default
             setting is auto, enabling colour when output is tty
 -H <path>   Specify HVSC root directory
 -S <file>   Specify STIL file
 -D          Dump an example configuration file to stdout

Fields      : ".join(", ", split(/\|/, lc($opt_field_names)))." (or 'all')
HVSC path   : ".opt_get("hvsc_path", "[not set]")."
STIL file   : ".opt_get("stil_file", "[not set]")."
Config file : ".$cfg_path."

") if (!defined(opt_get("expr")) || opt_get("help"));


##
## Process the database
##
open(INFILE, "<:encoding(iso-8859-1)", opt_get("stil_file")) or
  die("Could not open STIL database file '".opt_get("stil_file")."'.\n");

binmode(STDOUT, ":encoding(UTF-8)");

my $subsong;
my $cont;
my $block;
my $data_set = 0;
my $nline = 0;
$opt_no_fields_set = scalar(@{opt_get("match_fields")});


# Somewhat naive line-based parser, but it works
while (defined(my $line = <INFILE>))
{
  $nline++;
  $line =~ s/\s+$//o;

  # Ignore comments
  if ($line =~ /^#/o)
  {
  }
  # Parse filenames
  elsif ($line =~ /^(\/.+)$/o)
  {
    $$data{"filename"} = $1;
    $subsong = 0;
    $data_set = 1;
  }
  # Parse sub-song entries
  elsif ($line =~ /^\(#(\d+)\)/o)
  {
    $subsong = $1;
  }
  # Parse block data
  elsif ($line =~ /^\s*($opt_field_names): (.+)$/o)
  {
    $block = $1;
    $cont = 1;
    $$data{"songs"}{$subsong}{$block} = $2;
  }
  elsif ($line =~ /^         (.+)$/o)
  {
    $$data{"songs"}{$subsong}{$block} .= " ".$1 if $cont;
  }
  # End of block?
  else
  {
    if ($data_set)
    {
      # Check hit on filename
      my $hits = is_field_enabled("filename") && do_grep($$data{"filename"});

      # Check hits on subsongs/fields
      my %hits_sub = ();
      foreach my $song (keys %{$$data{"songs"}})
      {
        foreach my $field (keys %{$$data{"songs"}{$song}})
        {
          if (is_field_enabled($field))
          {
            push(@{$hits_sub{$song}}, $field) if do_grep($$data{"songs"}{$song}{$field});
          }
        }
      }

      # Process and output hits, if any
      if ($hits || scalar keys(%hits_sub) > 0)
      {
        my $opt_command = opt_get("command");
        if (defined($opt_command) && length($opt_command) > 0)
        {
          my $datafile = opt_get("hvsc_path").$$data{"filename"};
          my @args = split(/\s+/, $opt_command);
          push(@args, $datafile);
          system(@args) == 0 or die("Could not execute ".$args[0].": ".$?."\n");
        }
        else
        {
          print output_line(opt_get("hvsc_path").$$data{"filename"})."\n" unless opt_get("quiet");
          if (!opt_get("names") && !opt_get("quiet"))
          {
            foreach my $song (sort keys %hits_sub)
            {
              my $shits = $hits_sub{$song};
              foreach my $mfield (@{$shits})
              {
                # Output the matching field
                output_field($song, $mfield);
              }

              # Output additional requested fields, if any
              # (not duplicating the matching fields)
              foreach my $tfield (sort @{opt_get("out_fields")})
              {
                if (!grep { /^$tfield$/ } @{$shits})
                {
                  output_field($song, $tfield);
                }
              }
            }
            print "--\n";
          }
        }
      }

      # Clear data
      $data = {};
      $cont = 0;
      $block = "";
    }
    $data_set = 0;
  }
}

close(INFILE);
