#!/usr/pkg/bin/perl -w
# -*- mode: perl; coding: utf-8 -*- ###########################################
#
# Setup
#
###############################################################################
use 5.008; # we process Unicode texts
use strict;
use warnings;


###############################################################################
#
#                   This is the merged version of the script.
#
#                !!! DO NOT EDIT - YOUR CHANGES WILL BE LOST !!!
#
#          Any changes should be done to the original modules instead.
#
###############################################################################
package fi::common;
use strict;
use warnings;
use base qw(Exporter);

our @EXPORT      = qw(message debug fetchRaw fetchTree
		      timeToEpoch fullTimeToEpoch);
our @EXPORT_OK   = qw(setQuiet setDebug setTimeZone);
our %EXPORT_TAGS = (
		    main => [qw(message debug setQuiet setDebug setTimeZone)],
		   );

# Perl core modules
use Carp;
use Encode qw(decode);
use POSIX qw(tzset);
use Time::Local qw(timelocal);

# Other modules
use HTML::TreeBuilder;
use XMLTV::Get_nice;

# Normal message, disabled with --quiet
{
  my $quiet = 0;
  sub message(@)  { print STDERR "@_\n" unless $quiet }
  sub setQuiet($) { ($quiet) = @_ }
}

# Debug message, enabled with --debug
{
  my $debug = 0;
  sub debug($@) {
    my $level = shift;
    print STDERR "@_\n" unless $debug < $level;
  }
  sub setDebug($) {
    if (($debug) = @_) {
      # Debug messages may contain Unicode
      binmode(STDERR, ":encoding(utf-8)");
      debug(1, "Debug level set to $debug.");
    }
  }
}

# Fetch URL as UTF-8 encoded string
sub fetchRaw($;$$) {
  my($url, $encoding, $nofail) = @_;
  debug(2, "Fetching URL '$url'");
  my $content;
  my $retries = 5; # this seems to be enough?
 RETRY:
  while (1) {
      eval {
	  local $SIG{ALRM} = sub { die "Timeout" };

	  # Default TCP timeouts are too long. If we don't get a response
	  # within 20 seconds, then that's usually an indication that
	  # something is really wrong on the server side.
	  alarm(20);
	  $content = get_nice($url);
	  alarm(0);
      };

      unless ($@) {
	  # Everything is OK
	  # NOTE: utf-8 means "strict UTF-8 standard encoding"
	  $content = decode($encoding || "utf-8", $content);
	  last RETRY;
      } elsif (($@ =~ /error: 500 Timeout/) && $retries--) {
	  # Let's try this one more time
	  carp "fetchRaw(): timeout. Retrying...";
      } elsif ($nofail) {
	  # Caller requested not to fail
	  $content = "";
	  last RETRY;
      } else {
	  # Fail on everything else
	  croak "fetchRaw(): $@";
      }
  }
  debug(5, $content);
  return($content);
}

# Fetch URL as parsed HTML::TreeBuilder
sub fetchTree($;$$$) {
  my($url, $encoding, $nofail, $unknown) = @_;
  my $content = fetchRaw($url, $encoding, $nofail);
  my $tree = HTML::TreeBuilder->new();
  $tree->ignore_unknown(!$unknown);
  local $SIG{__WARN__} = sub { carp("fetchTree(): $_[0]") };
  $tree->parse($content) or croak("fetchTree() parse failure for '$url'");
  $tree->eof;
  return($tree);
}

#
# Time zone handling
#
# After setting up the day list we switch to a fixed time zone in order to
# interpret the program start times from finnish sources. In this case we of
# course use
#
#      Europe/Helsinki
#
# which can mean
#
#      EET  = GMT+02:00 (East European Time)
#      EEST = GMT+03:00 (East European Summer Time)
#
# depending on the day of the year. By using a fixed time zone this grabber
# will always be able to correctly calculate the program start time in UTC,
# no matter what the time zone of the local system is.
#
# Test program:
# ---------------------- CUT HERE ---------------------------------------------
# use Time::Local;
# use POSIX qw(strftime tzset);
#
# # DST test days for Europe 2010
# my @testdays = (
# 		# hour, minute, mday, month
# 		[    2,     00,    1,     1],
# 		[    2,     59,   28,     3],
# 		[    3,     00,   28,     3],
# 		[    3,     01,   28,     3],
# 		[    3,     00,    1,     7],
# 		[    3,     59,   31,    10],
# 		[    4,     00,   31,    10],
# 		[    4,     01,   31,    10],
# 		[    2,     00,    1,    12],
# 	       );
#
# print strftime("System time zone is: %Z\n", localtime(time()));
# if (@ARGV) {
#   $ENV{TZ} = "Europe/Helsinki";
#   tzset();
# }
# print strftime("Script time zone is: %Z\n", localtime(time()));
#
# foreach my $date (@testdays) {
#   my $time = timelocal(0, @{$date}[1, 0, 2], $date->[3] - 1, 2010);
#   print
#     "$time: ", strftime("%d-%b-%Y %T %z", localtime($time)),
#     " -> ",    strftime("%d-%b-%Y %T +0000", gmtime($time)), "\n";
# }
# ---------------------- CUT HERE ---------------------------------------------
#
# Test runs:
#
# 1) system on Europe/Helsinki time zone [REFERENCE]
#
# $ perl test.pl
# System time zone is: EET
# Script time zone is: EET
# 1262304000: 01-Jan-2010 02:00:00 +0200 -> 01-Jan-2010 00:00:00 +0000
# 1269737940: 28-Mar-2010 02:59:00 +0200 -> 28-Mar-2010 00:59:00 +0000
# 1269738000: 28-Mar-2010 04:00:00 +0300 -> 28-Mar-2010 01:00:00 +0000
# 1269738060: 28-Mar-2010 04:01:00 +0300 -> 28-Mar-2010 01:01:00 +0000
# 1277942400: 01-Jul-2010 03:00:00 +0300 -> 01-Jul-2010 00:00:00 +0000
# 1288486740: 31-Oct-2010 03:59:00 +0300 -> 31-Oct-2010 00:59:00 +0000
# 1288490400: 31-Oct-2010 04:00:00 +0200 -> 31-Oct-2010 02:00:00 +0000
# 1288490460: 31-Oct-2010 04:01:00 +0200 -> 31-Oct-2010 02:01:00 +0000
# 1291161600: 01-Dec-2010 02:00:00 +0200 -> 01-Dec-2010 00:00:00 +0000
#
# 2) system on America/New_York time zone
#
# $ TZ="America/New_York" perl test.pl
# System time zone is: EST
# Script time zone is: EST
# 1262329200: 01-Jan-2010 02:00:00 -0500 -> 01-Jan-2010 07:00:00 +0000
# 1269759540: 28-Mar-2010 02:59:00 -0400 -> 28-Mar-2010 06:59:00 +0000
# 1269759600: 28-Mar-2010 03:00:00 -0400 -> 28-Mar-2010 07:00:00 +0000
# 1269759660: 28-Mar-2010 03:01:00 -0400 -> 28-Mar-2010 07:01:00 +0000
# 1277967600: 01-Jul-2010 03:00:00 -0400 -> 01-Jul-2010 07:00:00 +0000
# 1288511940: 31-Oct-2010 03:59:00 -0400 -> 31-Oct-2010 07:59:00 +0000
# 1288512000: 31-Oct-2010 04:00:00 -0400 -> 31-Oct-2010 08:00:00 +0000
# 1288512060: 31-Oct-2010 04:01:00 -0400 -> 31-Oct-2010 08:01:00 +0000
# 1291186800: 01-Dec-2010 02:00:00 -0500 -> 01-Dec-2010 07:00:00 +0000
#
# 3) system on America/New_York time zone, script on Europe/Helsinki time zone
#    [compare to output from (1)]
#
# $ TZ="America/New_York" perl test.pl switch
# System time zone is: EST
# Script time zone is: EET
# 1262304000: 01-Jan-2010 02:00:00 +0200 -> 01-Jan-2010 00:00:00 +0000
# 1269737940: 28-Mar-2010 02:59:00 +0200 -> 28-Mar-2010 00:59:00 +0000
# 1269738000: 28-Mar-2010 04:00:00 +0300 -> 28-Mar-2010 01:00:00 +0000
# 1269738060: 28-Mar-2010 04:01:00 +0300 -> 28-Mar-2010 01:01:00 +0000
# 1277942400: 01-Jul-2010 03:00:00 +0300 -> 01-Jul-2010 00:00:00 +0000
# 1288486740: 31-Oct-2010 03:59:00 +0300 -> 31-Oct-2010 00:59:00 +0000
# 1288490400: 31-Oct-2010 04:00:00 +0200 -> 31-Oct-2010 02:00:00 +0000
# 1288490460: 31-Oct-2010 04:01:00 +0200 -> 31-Oct-2010 02:01:00 +0000
# 1291161600: 01-Dec-2010 02:00:00 +0200 -> 01-Dec-2010 00:00:00 +0000
#
# Setup fixed time zone for program start time interpretation
sub setTimeZone() {
  $ENV{TZ} = "Europe/Helsinki";
  tzset();
}

# Take a fi::day (day/month/year) and the program start time (hour/minute)
# and convert it to seconds since Epoch in the current time zone
sub timeToEpoch($$$) {
  my($date, $hour, $minute) = @_;
  return(timelocal(0, $minute, $hour,
		   $date->day(), $date->month() - 1, $date->year()));
}

# Same thing but without fi::day object
sub fullTimeToEpoch($$$$$) {
  my($year, $month, $day, $hour, $minute) = @_;
  return(timelocal(0, $minute, $hour, $day, $month - 1, $year));
}

# That's all folks
1;

###############################################################################
package fi::day;
use strict;
use warnings;
use Carp;
use Date::Manip qw(DateCalc ParseDate UnixDate);

# Overload stringify operation
use overload '""' => "ymd";

# Constructor (private)
sub _new {
  my($class, $day, $month, $year) = @_;

  my $self = {
	      day   => $day,
	      month => $month,
	      year  => $year,
	      ymd   => sprintf("%04d%02d%02d", $year, $month, $day),
	      ymdd  => sprintf("%04d-%02d-%02d", $year, $month, $day),
	      dmy   => sprintf("%02d.%02d.%04d", $day, $month, $year),
	     };

  return(bless($self, $class));
}

# instance methods
sub day   { $_[0]->{day}   };
sub dmy   { $_[0]->{dmy}   };
sub month { $_[0]->{month} };
sub year  { $_[0]->{year}  };
sub ymd   { $_[0]->{ymd}   };
sub ymdd  { $_[0]->{ymdd}  };

# class methods
sub generate {
  my($class, $offset, $days) = @_;

  # Start one day before offset
  my $date = DateCalc(ParseDate("today"), ($offset - 1) . " days")
    or croak("can't calculate start day");

  # End one day after offset + days
  my @dates;
  for (0..$days + 1) {
    my($year, $month, $day) = split(':', UnixDate($date, "%Y:%m:%d"));
    push(@dates, $class->_new(int($day), int($month), int($year)));
    $date  = DateCalc($date, "+1 day")
      or croak("can't calculate next day");
  }
  return(\@dates);
}

# That's all folks
1;

###############################################################################
package fi::programme;
use strict;
use warnings;
use Carp;
use POSIX qw(strftime);

# Import from internal modules
fi::common->import();

sub _trim {
  return unless defined($_[0]);
  $_[0] =~ s/^\s+//;
  $_[0] =~ s/\s+$//;
}

# Constructor
sub new {
  my($class, $channel, $language, $title, $start, $stop) = @_;
  _trim($title);
  croak "${class}::new called without valid title or start"
    unless defined($channel) && defined($title) && (length($title) > 0) &&
           defined($start);

  my $self = {
	      channel  => $channel,
	      language => $language,
	      title    => $title,
	      start    => $start,
	      stop     => $stop,
	     };

  return(bless($self, $class));
}

# instance methods
sub category {
  my($self, $category) = @_;
  _trim($category);
  $self->{category} = $category
    if defined($category) && length($category);
}
sub description {
  my($self, $description) = @_;
  _trim($description);
  $self->{description} = $description
    if defined($description) && length($description);
}
sub episode {
  my($self, $episode, $language) = @_;
  _trim($episode);
  if (defined($episode) && length($episode)) {
    $episode =~ s/\.$//;
    push(@{ $self->{episode} }, [$episode, $language]);
  }
}
sub season_episode {
  my($self, $season, $episode) = @_;
  # only accept a pair of valid, positive integers
  if (defined($season) && defined($episode)) {
    $season  = int($season);
    $episode = int($episode);
    if (($season  > 0) && ($episode > 0)) {
      $self->{season}         = $season;
      $self->{episode_number} = $episode;
    }
  }
}
sub start {
  my($self, $start) = @_;
  $self->{start} = $start
    if defined($start) && length($start);
  $start = $self->{start};
  croak "${self}::start: object without valid start time"
    unless defined($start);
  return($start);
}
sub stop {
  my($self, $stop) = @_;
  $self->{stop} = $stop
    if defined($stop) && length($stop);
  $stop = $self->{stop};
  croak "${self}::stop: object without valid stop time"
    unless defined($stop);
  return($stop);
}

# read-only
sub language { $_[0]->{language} }
sub title    { $_[0]->{title}    }

# Convert seconds since Epoch to XMLTV time stamp
#
# NOTE: We have to generate the time stamp using local time plus time zone as
#       some XMLTV users, e.g. mythtv in the default configuration, ignore the
#       XMLTV time zone value.
#
sub _epoch_to_xmltv_time($) {
  my($time) = @_;

  # Unfortunately strftime()'s %z is not portable...
  #
  # return(strftime("%Y%m%d%H%M%S %z", localtime($time));
  #
  # ...so we have to roll our own:
  #
  my @time = localtime($time); #               is_dst
  return(strftime("%Y%m%d%H%M%S +0", @time) . ($time[8] ? "3": "2") . "00");
}

# Configuration data
my %series_description;
my %series_title;
my @title_map;
my $title_strip_parental;

# Common regular expressions
# ($left, $special, $right) = ($description =~ $match_description)
my $match_description = qr/^\s*([^.!?]+[.!?])([.!?]+\s+)?\s*(.*)/;

sub dump {
  my($self, $writer) = @_;
  my $language    = $self->{language};
  my $title       = $self->{title};
  my $category    = $self->{category};
  my $description = $self->{description};
  my $episode     = $self->{episode_number};
  my $season      = $self->{season};
  my $subtitle    = $self->{episode};

  #
  # Programme post-processing
  #
  # Parental level removal (catch also the duplicates)
  $title =~ s/(?:\s+\((?:S|T|7|9|12|16|18)\))+\s*$//
      if $title_strip_parental;
  #
  # Title mapping
  #
  foreach my $map (@title_map) {
    if ($map->($title)) {
      debug(3, "XMLTV title '$self->{title}' mapped to '$title'");
      last;
    }
  }

  #
  # Check 1: object already contains episode
  #
  my($left, $special, $right);
  if (defined($subtitle)) {
    # nothing to be done
  }
  #
  # Check 2: title contains episode name
  #
  # If title contains a colon (:), check to see if the string on the left-hand
  # side of the colon has been defined as a series in the configuration file.
  # If it has, assume that the string on the left-hand side of the colon is
  # the name of the series and the string on the right-hand side is the name
  # of the episode.
  #
  # Example:
  #
  #   config: series title Prisma
  #   title:  Prisma: Totuus tappajadinosauruksista
  #
  # This will generate a program with
  #
  #   title:     Prisma
  #   sub-title: Totuus tappajadinosauruksista
  #
  elsif ((($left, $right) = ($title =~ /([^:]+):\s*(.*)/)) &&
	 (exists $series_title{$left})) {
    debug(3, "XMLTV series title '$left' episode '$right'");
    ($title, $subtitle) = ($left, $right);
  }
  #
  # Check 3: description contains episode name
  #
  # Check if the program has a description. If so, also check if the title
  # of the program has been defined as a series in the configuration. If it
  # has, assume that the first sentence (i.e. the text before the first
  # period, question mark or exclamation mark) marks the name of the episode.
  #
  # Example:
  #
  #   config:      series description Batman
  #   description: Pingviinin paluu. Amerikkalainen animaatiosarja....
  #
  # This will generate a program with
  #
  #   title:       Batman
  #   sub-title:   Pingviinin paluu
  #   description: Amerikkalainen animaatiosarja....
  #
  # Special cases
  #
  #   text:        Pingviinin paluu?. Amerikkalainen animaatiosarja....
  #   sub-title:   Pingviinin paluu?
  #   description: Amerikkalainen animaatiosarja....
  #
  #   text:        Pingviinin paluu... Amerikkalainen animaatiosarja....
  #   sub-title:   Pingviinin paluu...
  #   description: Amerikkalainen animaatiosarja....
  #
  #   text:        Pingviinin paluu?!? Amerikkalainen animaatiosarja....
  #   sub-title:   Pingviinin paluu?!?
  #   description: Amerikkalainen animaatiosarja....
  #
  elsif ((defined($description))              &&
	 (exists $series_description{$title}) &&
	 (($left, $special, $right) = ($description =~ $match_description))) {
    my $desc_subtitle;

    # Check for "Kausi <season>, osa <episode>. <maybe sub-title>...."
    if (my($desc_season, $desc_episode, $remainder) =
	($description =~ m/^Kausi\s+(\d+),\s+osa\s+(\d+)\.\s*(.*)$/)) {
	$season  = $desc_season;
	$episode = $desc_episode;

	# Repeat the above match on remaining description
	($left, $special, $right) = ($remainder =~ $match_description);

	# Take a guess if we have a episode title in description or not
	my $words;
	$words++ while $left =~ /\S+/g;
	if ($words > 5) {
	    # More than 5 words probably means no episode title
	    undef $left;
	    undef $special;
	    $right = $remainder;
	}

    # Check for "Kausi <season>. Jakso <episode>/<# of episodes>. <sub-title>...."
    } elsif (($desc_season, $desc_episode, $remainder) =
	($description =~ m,^Kausi\s+(\d+)\.\s+Jakso\s+(\d+)(?:/\d+)?\.\s*(.*)$,)) {
	$season  = $desc_season;
	$episode = $desc_episode;

	# Repeat the above match on remaining description
	($left, $special, $right) = ($remainder =~ $match_description);

    # Check for "Kausi <season>, <episode>/<# of episodes>. <sub-title>...."
    } elsif (($desc_season, $desc_episode, $remainder) =
	($description =~ m!^Kausi\s+(\d+),\s+(\d+)(?:/\d+)?\.\s*(.*)$!)) {
	$season  = $desc_season;
	$episode = $desc_episode;

	# Repeat the above match on remaining description
	($left, $special, $right) = ($remainder =~ $match_description);

    # Check for "<sub-title>. Kausi <season>, (jakso )?<episode>/<# of episodes>...."
    } elsif (($desc_subtitle, $desc_season, $desc_episode, $remainder) =
	     ($description =~ m!^(.+)\s+Kausi\s+(\d+),\s+(?:jakso\s+)?(\d+)(?:/\d+)?\.\s*(.*)$!)) {
	$left    = $desc_subtitle;
	$season  = $desc_season;
	$episode = $desc_episode;

	# Remainder is already the final episode description
	$right = $remainder;
	undef $special;
    }
    if (defined($left)) {
	unless (defined($special)) {
	    # We only remove period from episode title, preserve others
	    $left =~ s/\.$//;
	} elsif (($left    !~ /\.$/) &&
		 ($special =~ /^\.\s/)) {
	    # Ignore extraneous period after sentence
	} else {
	    # Preserve others, e.g. ellipsis
	    $special =~ s/\s+$//;
	    $left    .= $special;
	}
	debug(3, "XMLTV series title '$title' episode '$left'");
    }
    ($subtitle, $description) = ($left, $right);
  }

  # XMLTV programme desciptor (mandatory parts)
  my %xmltv = (
	       channel => $self->{channel},
	       start   => _epoch_to_xmltv_time($self->{start}),
	       stop    => _epoch_to_xmltv_time($self->{stop}),
	       title   => [[$title, $language]],
	      );
  debug(3, "XMLTV programme '$xmltv{channel}' '$xmltv{start} -> $xmltv{stop}' '$title'");

  # XMLTV programme descriptor (optional parts)
  if (defined($subtitle)) {
    $subtitle = [[$subtitle, $language]]
      unless ref($subtitle);
    $xmltv{'sub-title'} = $subtitle;
    debug(3, "XMLTV programme episode ($_->[1]): $_->[0]")
      foreach (@{ $xmltv{'sub-title'} });
  }
  if (defined($category) && length($category)) {
    $xmltv{category} = [[$category, $language]];
    debug(4, "XMLTV programme category: $category");
  }
  if (defined($description) && length($description)) {
    $xmltv{desc} = [[$description, $language]];
    debug(4, "XMLTV programme description: $description");
  }
  if (defined($season) && defined($episode)) {
    $xmltv{'episode-num'} =  [[ ($season - 1) . '.' . ($episode - 1) . '.', 'xmltv_ns' ]];
    debug(4, "XMLTV programme season/episode: $season/$episode");
  }

  $writer->write_programme(\%xmltv);
}

# class methods
# Parse config line
sub parseConfigLine {
  my($class, $line) = @_;

  # Extract words
  my($command, $keyword, $param) = split(' ', $line, 3);

  if ($command eq "series") {
    if ($keyword eq "description") {
      $series_description{$param}++;
    } elsif ($keyword eq "title") {
      $series_title{$param}++;
    } else {
      # Unknown series configuration
      return;
    }
  } elsif ($command eq "title") {
      if (($keyword eq "map") &&
	  # Accept "title" and 'title' for each parameter
	  (my(undef, $from, undef, $to) =
	   ($param =~ /^([\'\"])([^\1]+)\1\s+([\'\"])([^\3]+)\3/))) {
	  debug(3, "title mapping from '$from' to '$to'");
	  $from = qr/^\Q$from\E/;
	  push(@title_map, sub { $_[0] =~ s/$from/$to/ });
      } elsif (($keyword eq "strip") &&
	       ($param   =~ /parental\s+level/)) {
	  debug(3, "stripping parental level from titles");
	  $title_strip_parental++;
      } else {
	  # Unknown title configuration
	  return;
      }
  } else {
    # Unknown command
    return;
  }

  return(1);
}

# Fix overlapping programmes
sub fixOverlaps {
  my($class, $list) = @_;

  # No need to cleanup empty/one-entry lists
  return unless defined($list) && (@{ $list } >= 2);

  my $current = $list->[0];
  foreach my $next (@{ $list }[1..$#{ $list }]) {

    # Does next programme start before current one ends?
    if ($current->{stop} > $next->{start}) {
      debug(3, "Fixing overlapping programme '$current->{title}' $current->{stop} -> $next->{start}.");
      $current->{stop} = $next->{start};
    }

    # Next programme
    $current = $next;
  }
}

# That's all folks
1;

###############################################################################
package fi::programmeStartOnly;
use strict;
use warnings;
use base qw(Exporter);

our @EXPORT = qw(startProgrammeList appendProgramme convertProgrammeList);

# Import from internal modules
fi::common->import();

sub startProgrammeList($$) {
  my($id, $language) = @_;
  return({
	  id         => $id,
	  language   => $language,
	  programmes => []
	 });

}

sub appendProgramme($$$$) {
  my($self, $hour, $minute, $title) = @_;

  # NOTE: start time in minutes from midnight -> must be converted to epoch
  my $object = fi::programme->new($self->{id}, $self->{language},
				  $title, $hour * 60 + $minute);

  push(@{ $self->{programmes} }, $object);
  return($object);
}

sub convertProgrammeList($$$$) {
  my($self, $yesterday, $today, $tomorrow) = @_;
  my $programmes = $self->{programmes};
  my $id         = $self->{id};

  # No data found -> return empty list to indicate failure
  return([]) unless @{ $programmes };

  # Check for day crossing between first and second entry
  my @dates = ($today, $tomorrow);
  if ((@{ $programmes } > 1) &&
      ($programmes->[0]->start() > $programmes->[1]->start())) {

    # Did caller specify yesterday?
    if (defined $yesterday) {
      unshift(@dates, $yesterday);
    } else {
      # No, assume the second entry is broken -> drop it
      splice(@{ $programmes }, 1, 1);
    }
  }

  my @objects;
  my $date          = shift(@dates);
  my $current       = shift(@{ $programmes });
  my $current_start = $current->start();
  my $current_epoch = timeToEpoch($date,
				  int($current_start / 60),
				  $current_start % 60);
  foreach my $next (@{ $programmes }) {

    # Start of next program might be on the next day
    my $next_start = $next->start();
    if ($current_start > $next_start) {

      #
      # Sanity check: try to detect fake day changes caused by broken data
      #
      # Incorrect date change example:
      #
      #   07:00 Voittovisa
      #   07:50 Ostoskanava
      #   07:20 F1 Ennakkolähetys       <-- INCORRECT DAY CHANGE
      #   07:50 Dino, pikku dinosaurus
      #   08:15 Superpahisten liiga
      #
      #   -> 07:50 (=  470) - 07:20 (=  440) =   30 minutes < 2 hours
      #
      # Correct date change example
      #
      #   22:35 Irene Huss: Tulitanssi
      #   00:30 Formula 1: Extra
      #
      #   -> 22:35 (= 1355) - 00:30 (=   30) = 1325 minutes > 2 hours
      #
      # I grabbed the 2 hour limit out of thin air...
      #
      if ($current_start - $next_start > 2 * 60) {
	$date = shift(@dates);

	# Sanity check
	unless ($date) {
	  message("WARNING: corrupted data for $id on $today: two date changes detected. Ignoring data!");
	  return([]);
	}
      } else {
	message("WARNING: corrupted data for $id on $today: fake date change detected. Ignoring.");
      }
    }

    my $next_epoch = timeToEpoch($date,
				 int($next_start / 60),
				 $next_start % 60);

    my $title = $current->title();
    debug(3, "Programme $id ($current_epoch -> $next_epoch) $title");

    # overwrite start & stop times with epoch: see appendProgramme()
    $current->start($current_epoch);
    $current->stop($next_epoch);

    push(@objects, $current);

    # Move to next program
    $current       = $next;
    $current_start = $next_start;
    $current_epoch = $next_epoch;
  }

  return(\@objects);
}

# That's all folks
1;

###############################################################################
package fi::source::foxtv;
use strict;
use warnings;

BEGIN {
  our $ENABLED = 1;
}

# Import from internal modules
fi::common->import();
fi::programmeStartOnly->import();

# Cleanup filter regexes
my $cleanup_match = qr!\s*(?:(?:\d+\.\s+)?(?:Kausi|Jakso|Osa)\.?(?:\s+(:?\d+/)?\d+\.\s+)?){1,2}!i;

# Description
sub description { 'foxtv.fi' }

# Grab channel list - only one channel available, no need to fetch anything...
sub channels { { 'foxtv.fi' => 'fi FOX' } }

# Grab one day
sub grab {
  my($self, $id, $yesterday, $today, $tomorrow, $offset) = @_;

  # Get channel number from XMLTV id
  return unless ($id eq "foxtv.fi");

  # Fetch & parse HTML (do not ignore HTML5 <section>)
  # Anything beyond 14 days results in 404 error -> ignore errors
  my $root = fetchTree("http://www.foxtv.fi/ohjelmaopas/fox/$today",
		       undef, 1, 1);
  if ($root) {

    #
    # Each page contains the programmes from current day to requested day.
    # All program info is contained within a section with class "row day"
    #
    #  <div id="scheduleContainer">
    #   <section class="row day" data-magellan-destination="day20160514" ...>
    #    <ul class="... scheduleGrid">
    #     <li ...>
    #      ...
    #      <h5>15:00</h5>
    #      ...
    #      <h3>Family Guy</h3>
    #      ...
    #      <h4>Maaseudun taikaa, Kausi 12 | Jakso 21</h4>
    #      <p>Kauden Suomen tv-ensiesitys. ...</p>
    #      ...
    #     </li>
    #     ...
    #    </ul>
    #   </section>
    #   ...
    #  </div>
    #
    my $opaque = startProgrammeList($id, "fi");
    if (my $container = $root->look_down("class"                     => "row day",
					 "data-magellan-destination" => "day$today")) {
      if (my @programmes = $container->look_down("_tag"  => "li",
						 "class" => qr/acilia-schedule-event/)) {
	foreach my $programme (@programmes) {
	  my $start = $programme->find("h5");
	  my $title = $programme->find("h3");

	  if ($start && $title) {
	    if (my($hour, $minute) =
		$start->as_text() =~ /^(\d{2}):(\d{2})$/) {
	      my $desc  = $programme->find("p");
	      my $extra = $programme->find("h4");

	      $title = $title->as_text();

	      my($episode_name, $season, $episode_number) =
		$extra->as_text() =~ /^(.*)?,\s+Kausi\s+(\d+)\s+\S\s+Jakso\s+(\d+)$/
		  if $extra;

	      # Cleanup some of the most common inconsistencies....
	      $episode_name =~ s/^$cleanup_match// if defined $episode_name;
	      if ($desc) {
	        ($desc = $desc->as_text()) =~ s/^$cleanup_match//;

		# Title can be first in description too
		$desc =~ s/^$title(?:\.\s+)?//;

		# Episode title can be first in description too
		$desc =~ s/^$episode_name(?:\.\s+)?// if defined $episode_name;

		# Description can be empty
		undef $desc if $desc eq '';
	      }

	      # Episode name can be the same as the title
	      undef $episode_name
		if defined($episode_name) &&
		   (($episode_name eq '') || ($episode_name eq $title));

	      debug(3, "List entry fox ($hour:$minute) $title");
	      debug(4, $episode_name) if defined $episode_name;
	      debug(4, $desc)         if defined $desc;
	      debug(4, sprintf("s%02de%02d", $season, $episode_number))
		if (defined($season) && defined($episode_number));

	      my $object = appendProgramme($opaque, $hour, $minute, $title);
	      $object->description($desc);
	      $object->episode($episode_name, "fi");
	      $object->season_episode($season, $episode_number);
	    }
	  }
	}
      }
    }

    # Done with the HTML tree
    $root->delete();

    # Convert list to program objects
    #
    # First entry always starts on $today -> don't use $yesterday
    # Last entry always ends on $tomorrow.
    return(convertProgrammeList($opaque, undef, $today, $tomorrow));
  }

  return;
}

# That's all folks
1;

###############################################################################
package fi::source::iltapulu;
use strict;
use warnings;

#
# NOTE: this data source was earlier known as http://tv.hs.fi
# NOTE: this data source was earlier known as http://tv.tvnyt.fi
#
BEGIN {
  our $ENABLED = 1;
}

use Carp;

# Import from internal modules
fi::common->import();

# Description
sub description { 'iltapulu.fi' }

# Grab channel list
sub channels {
  my %channels;

  # Fetch & parse HTML
  my $root = fetchTree("http://www.iltapulu.fi/?&all=1");
  if ($root) {
    #
    # Channel list can be found in table rows
    #
    #  <table class="channel-row">
    #   <tbody>
    #    <tr>
    #     <td class="channel-name">...</td>
    #     <td class="channel-name">...</td>
    #     ...
    #    </tr>
    #   </tbody>
    #   ...
    #  </table>
    #  ...
    #
    if (my @tables = $root->look_down("class" => "channel-row")) {
      foreach my $table (@tables) {
	if (my @cells = $table->look_down("class" => "channel-name")) {
	  foreach my $cell (@cells) {
	    if (my $image = $cell->find("img")) {
	      my $name = $image->attr("alt");
	      $name =~ s/\s+tv-ohjelmat$//;

	      if (defined($name) && length($name)) {
		my $channel_id = (scalar(keys %channels) + 1) . ".iltapulu.fi";
		debug(3, "channel '$name' ($channel_id)");
		$channels{$channel_id} = "fi $name";
	      }
	    }
	  }
	}
      }
    }

    # Done with the HTML tree
    $root->delete();
  }

  debug(2, "Source iltapulu.fi parsed " . scalar(keys %channels) . " channels");
  return(\%channels);
}

# Grab one day
sub grab {
  my($self, $id, $yesterday, $today, $tomorrow, $offset) = @_;

  # Get channel number from XMLTV id
  return unless my($channel) = ($id =~ /^([-\w]+)\.iltapulu\.fi$/);

  # Fetch & parse HTML
  my $root = fetchTree("http://www.iltapulu.fi/?all=1&date=" . $today->ymdd());
  if ($root) {
    my $count = 0;
    my @objects;

    #
    # Programme data is contained inside a div class="<full-row>"
    #
    #  <table class="channel-row">
    #   <tbody>
    #    <tr>
    #     <td class="channel-name">...</td>
    #     <td class="channel-name">...</td>
    #     ...
    #    </tr>
    #    <tr class="full-row...">
    #     <td>
    #      <div class="schedule">
    #       <div class="full-row" data-starttime="1424643300" data-endtime="1424656800">
    #        <table>
    #         <tr>
    #          <td class="time">00.15</td>
    #          <td class="title[ movie]">
    #           <a class="program-open..." ... title="... description ...">
    #            Uutisikkuna
    #           </a>
    #          </td>
    #         </tr>
    #        </table>
    #       </div>
    #      </div>
    #      ...
    #     </td>
    #     ...
    #    </tr>
    #    ...
    #   </tbody>
    #  </table>
    #  ...
    #
    if (my @tables = $root->look_down("class" => "channel-row")) {

     TABLES:
      foreach my $table (@tables) {
	if (my @cells = $table->look_down("class" => "channel-name")) {

	  # Channel in this table?
	  my $index = $channel - $count - 1;
	  $count   += @cells;
	  if ($channel <= $count) {

	    # Extract from each row the div's from the same index
	    my @divs;
	    if (my @rows = $table->look_down("_tag"  => "tr",
					     "class" => qr/full-row/)) {
	      foreach my $row (@rows) {
		my $children = $row->content_array_ref;
		if ($children) {
		  my $td = $children->[$index];
		  push(@divs, $td->look_down("class" => qr/full-row/))
		    if defined($td);
		}
	      }
	    }

	    for my $div (@divs) {
	      my $start = $div->attr("data-starttime");
	      my $end   = $div->attr("data-endtime");
	      my $link  = $div->look_down("class" => qr/program-open/);

	      if ($start && $end && $link) {
		my $title = $link->as_text();

		if (length($title)) {
		  my $desc     = $link->attr("title");
		  my $category = ($link->parent()->attr("class") =~ /movie/) ? "elokuvat" : undef;

		  debug(3, "List entry ${id} ($start -> $end) $title");
		  debug(4, $desc)     if $desc;
		  debug(4, $category) if defined $category;

		  # Create program object
		  my $object = fi::programme->new($id, "fi", $title, $start, $end);
		  $object->category($category);
		  $object->description($desc);
		  push(@objects, $object);
		}
	      }
	    }

	    # skip the rest of the data
	    last TABLES;
	  }
	}
      }
    }

    # Done with the HTML tree
    $root->delete();

    # Fix overlapping programmes
    fi::programme->fixOverlaps(\@objects);

    return(\@objects);
  }

  return;
}

# That's all folks
1;

###############################################################################
package fi::source::telkku;
use strict;
use warnings;
use Date::Manip qw(UnixDate);
use JSON qw();

BEGIN {
  our $ENABLED = 1;
}

# Import from internal modules
fi::common->import();
fi::programmeStartOnly->import();

# Description
sub description { 'telkku.com' }

my %categories = (
  SPORTS => "urheilu",
  MOVIE  => "elokuvat",
);

# Fetch raw HTML and extract & parse JSON
sub _getJSON($$$) {
  my($date, $page, $keys) = @_;

  # Fetch raw text
  my $text = fetchRaw("http://www.telkku.com/tv-ohjelmat/$date/patch/koko-paiva");
  if ($text) {
    #
    # All data is encoded in JSON in a script node
    #
    # <script>
    #    window.__INITIAL_STATE__ = {...};
    # </script>
    #
    my($match) = ($text =~ /window.__INITIAL_STATE__ = ({.+});/);

    if ($match) {
      my $decoded = JSON->new->decode($match);

      if (ref($decoded) eq "HASH") {
	my $data = $decoded;

        #debug(5, JSON->new->pretty->encode($decoded));

	# step through hashes using key sequence
	foreach my $key (@{$keys}) {
	  debug(5, "Looking for JSON key $key");
	  return unless exists $data->{$key};
	  $data = $data->{$key};
	}
	debug(5, "Found JSON data");

	#debug(5, JSON->new->pretty->encode($data));
	#debug(5, "KEYS: ", join(", ", sort keys %{$data}));
	return($data);
      }
    }
  }

  return;
}

# Grab channel list
sub channels {

  # Fetch & extract JSON sub-part
  my $data = _getJSON("tanaan", "peruskanavat",
		      ["channelGroups",
		       "channelGroupsArray"]);

  #
  # Channels data has the following structure
  #
  #  [
  #    {
  #      slug     => "peruskanavat",
  #      channels => [
  #                    {
  #                      id   => "yle-tv1",
  #                      name => "Yle TV1",
  #                      ...
  #                    },
  #                    ...
  #                  ],
  #      ...
  #    },
  #    ...
  #  ]
  #
  if (ref($data) eq "ARRAY") {
    my %channels;
    my %duplicates;

    foreach my $item (@{$data}) {
      if ((ref($item)             eq "HASH")  &&
	  (exists $item->{slug})              &&
	  (exists $item->{channels})          &&
	  (ref($item->{channels}) eq "ARRAY")) {
	my $group    = $item->{slug};
	my $channels = $item->{channels};

	if (defined($group) && length($group) &&
	    (ref($channels) eq "ARRAY")) {
	  debug(2, "Source telkku.com found group '$group' with " . scalar(@{$channels}) . " channels");

	  foreach my $channel (@{$channels}) {
	    if (ref($channel) eq "HASH") {
	      my $id   = $channel->{id};
	      my $name = $channel->{name};

	      if (defined($id) && length($id)   &&
		  (not exists $duplicates{$id}) &&
		  length($name)) {
		debug(3, "channel '$name' ($id)");
		$channels{"${id}.${group}.telkku.com"} = "fi $name";

		# Same ID can appear in multiple groups - avoid duplicates
		$duplicates{$id}++;
	      }
	    }
	  }
	}
      }
    }

    debug(2, "Source telkku.com parsed " . scalar(keys %channels) . " channels");
    return(\%channels);
  }

  return;
}

# Grab one day
sub grab {
  my($self, $id, $yesterday, $today, $tomorrow, $offset) = @_;

  # Get channel number from XMLTV id
  return unless my($channel, $group) = ($id =~ /^([\w-]+)\.(\w+)\.telkku\.com$/);

  # Fetch & extract JSON sub-part
  my $data = _getJSON($today, $group,
		      ["offeringByChannelGroup",
		       $group,
		       "offering",
		       "publicationsByChannel"]);

  #
  # Programme data has the following structure
  #
  #  [
  #    {
  #      channel      => {
  #                        id => "yle-tv1",
  #                        ...
  #                      },
  #      publications => [
  #                        {
  #                           startTime     => "2016-08-18T06:25:00.000+03:00",
  #                           endTime       => "2016-08-18T06:55:00.000+03:00",
  #                           title         => "Helil kyläs",
  #                           description   => "Osa 9/10. Asiaohjelma, mikä ...",
  #                           programFormat => "MOVIE",
  #                           ...
  #                        },
  #                        ...
  #                      ]
  #    },
  #    ...
  #  ]
  #
  if (ref($data) eq "ARRAY") {
    my @objects;

    foreach my $item (@{$data}) {
      if ((ref($item)                 eq "HASH")  &&
	  (ref($item->{channel})      eq "HASH")  &&
	  (ref($item->{publications}) eq "ARRAY") &&
	  ($item->{channel}->{id} eq $channel)) {

	foreach my $programme (@{$item->{publications}}) {
	   my($start, $end, $title, $desc) =
	     @{$programme}{qw(startTime endTime title description)};

	   #debug(5, JSON->new->pretty->encode($programme));

	   if ($start && $end && $title && $desc) {
             $start = UnixDate($start, "%s");
	     $end   = UnixDate($end,   "%s");

	     # NOTE: entries with same start and end time are invalid
	     if ($start && $end && ($start != $end)) {
	       my $category = $categories{$programme->{programFormat}};

	       debug(3, "List entry $channel.$group ($start -> $end) $title");
	       debug(4, $desc);
	       debug(4, $category) if defined $category;

	       # Create program object
	       my $object = fi::programme->new($id, "fi", $title, $start, $end);
	       $object->category($category);
	       $object->description($desc);
	       push(@objects, $object);
	     }
	   }
	}
      }
    }

    # Fix overlapping programmes
    fi::programme->fixOverlaps(\@objects);

    return(\@objects);
  }

  return;
}

# That's all folks
1;

###############################################################################
package fi::source::telvis;
use strict;
use warnings;

BEGIN {
  our $ENABLED = 1;
}

# Import from internal modules
fi::common->import();
fi::programmeStartOnly->import();

# Description
sub description { 'telvis.fi' }

# Grab channel list
sub channels {
  my %channels;

  # Fetch & parse HTML
  my $root = fetchTree("http://www.telvis.fi/tvohjelmat/?vw=channel",
		       "iso-8859-1");
  if ($root) {

    #
    # Channel list can be found in multiple <div> nodes
    #
    # <div class="progs" style="text-align:left;">
    #  <a href="/tvohjelmat/?vw=channel&ch=tv1&sh=new&dy=03.02.2011">YLE TV1</a>
    #  <a href="/tvohjelmat/?vw=channel&ch=tv2&sh=new&dy=03.02.2011">YLE TV2</a>
    #  ...
    # </div>
    #
    if (my @containers = $root->look_down("class" => "progs")) {
      foreach my $container (@containers) {
	if (my @refs = $container->find("a")) {
	  debug(2, "Source telvis.fi found " . scalar(@refs) . " channels");
	  foreach my $ref (@refs) {
	    my $href = $ref->attr("href");
	    my $name = $ref->as_text();

	    if (defined($href) && length($name) &&
		(my($id) = ($href =~ m,vw=channel&ch=([^&]+)&,))) {
	      debug(3, "channel '$name' ($id)");
	      $channels{"${id}.telvis.fi"} = "fi $name";
	    }
	  }
	}
      }
    }

    # Done with the HTML tree
    $root->delete();

  } else {
    return;
  }

  debug(2, "Source telvis.fi parsed " . scalar(keys %channels) . " channels");
  return(\%channels);
}

# Grab one day
sub grab {
  my($self, $id, $yesterday, $today, $tomorrow, $offset) = @_;

  # Get channel number from XMLTV id
  return unless my($channel) = ($id =~ /^([^.]+)\.telvis\.fi$/);

  # Fetch & parse HTML
  my $root = fetchTree("http://www.telvis.fi/lite/?vw=channel&ch=${channel}&dy=" . $today->dmy(),
		       "iso-8859-1");
  if ($root) {
    #
    # Each programme can be found in a separate <tr> node under a <div> node
    #
    # <div class="tm">
    #  <table>
    #   ...
    #   <tr>
    #    <td valign="top"><strong>13:50</strong></td>
    #    <td><strong>Serranon perhe</strong>&nbsp;
    #     Suuret sanat suuta halkovat. Diego kertoo perheelleen suhteestaan Celiaan. Reaktiot pistävät miehelle jauhot suuhun. Ana pyytää Fitiltä palvelusta, josta tämä on otettu. Santi hoitaa Lourditasin asioita omin päin.
    #    </td>
    #   </tr>
    #   <tr class="zeb">
    #    <td valign="top"><strong>15:15</strong></td>
    #    <td><strong>Gilmoren tytöt</strong>&nbsp;
    #     Välirikko. Emily yrittää tuoda Christopherin takaisin perheensä piiriin, mutta Rory on saanut aina poissaolevasta isästä tarpeekseen. Lorelaita piirittää jälleen uusi ihailija.
    #    </td>
    #   </tr>
    #   ...
    #  </table>
    # </div>
    #
    my $opaque = startProgrammeList($id, "fi");
    if (my $container = $root->look_down("class" => "tm")) {
      if (my @rows = $container->find("tr")) {
	foreach my $row (@rows) {
	  my @columns = $row->find("td");
	  if (@columns == 2) {
	    my $start = $columns[0]->find("strong");
	    my $title = $columns[1]->find("strong");
	    if ($start && $title) {
	      $start = $start->as_text();
	      $title = $title->as_text();
	      if (my($hour, $minute) = ($start =~ /^(\d{2}):(\d{2})/)) {
		my $desc  = $columns[1]->as_text(); # includes $title
		$desc =~ s/^\Q$title\E\s+//;
		debug(3, "List entry $channel ($hour:$minute) $title");
		debug(4, $desc);

		# Only record entry if title isn't empty
		if (length($title) > 0) {
		  my $object = appendProgramme($opaque, $hour, $minute, $title);
		  $object->description($desc);
		}
	      }
	    }
	  }
	}
      }
    }

    # Done with the HTML tree
    $root->delete();

    # Convert list to program objects
    #
    # First entry always starts on $today -> don't use $yesterday
    # Last entries always end on $tomorrow
    #
    # Unfortunately the last entry of $today is not the first entry of
    # $tomorrow. That means that the last entry will always be missing as we
    # don't have a stop time for it :-(
    return(convertProgrammeList($opaque, undef, $today, $tomorrow));
  }

  return;
}

# That's all folks
1;

###############################################################################
package fi::source::yle;
use strict;
use warnings;
use Date::Manip;

BEGIN {
  our $ENABLED = 1;
}

# Import from internal modules
fi::common->import();

# Description
sub description { 'yle.fi' }

my %languages = (
    "fi" => "ohjelmaopas",
    "sv" => "programguide",
);

# Grab channel list
sub channels {
  my %channels;

  # yle.fi offers program guides in multiple languages
  foreach my $code (sort keys %languages) {

    # Fetch & parse HTML
    my $root = fetchTree("http://$languages{$code}.yle.fi/tv/opas");
    if ($root) {

      #
      # Channel list can be found from this list:
      #
      #  <ul class="channel-lists ...">
      #    <li><h1 id="yle-tv1">Yle TV1...</h1>...</li>
      #    <li><h1 id="yle-tv2">Yle TV2...</h1>...</li>
      #    ...
      #  </ul>
      #
      if (my $container = $root->look_down("class" => qr/^channel-lists\s+/)) {
	if (my @headers = $container->find("h1")) {
	  debug(2, "Source ${code}.yle.fi found " . scalar(@headers) . " channels");
	  foreach my $header (@headers) {
	    my $id   = $header->attr("id");
	    my $name = $header->as_text();

	    if (defined($id) && length($id) && length($name)) {
	      debug(3, "channel '$name' ($id)");
	      $channels{"${id}.${code}.yle.fi"} = "$code $name";
	    }
	  }
	}
      }

      # Done with the HTML tree
      $root->delete();

    } else {
      return;
    }
  }

  debug(2, "Source yle.fi parsed " . scalar(keys %channels) . " channels");
  return(\%channels);
}

# Grab one day
sub grab {
  my($self, $id, $yesterday, $today, $tomorrow, $offset) = @_;

  # Get channel number from XMLTV id
  return unless my($channel, $code) = ($id =~ /^([^.]+)\.([^.]+)\.yle\.fi$/);

  # Fetch & parse HTML (do not ignore HTML5 <time>)
  my $root = fetchTree("http://$languages{$code}.yle.fi/tv/opas?t=" . $today->ymdd(),
		       undef, undef, 1);
  if ($root) {
    my @objects;

    #
    # Each programme can be found in a separate <li> node
    #
    #  <ul class="channel-lists ...">
    #    <li>
    #      <h1 id="yle-tv1">Yle TV1...</h1>
    #      <ul>
    #        <li class="program-entry ...">
    #          <div class="program-label">
    #            <time class="dtstart" datetime="2014-06-15T01:30:00.000+03:00">01:30</time>
    #            <time class="dtend" datetime="2014-06-15T04:30:00.000+03:00"></time>
    #            <div class="program-title">
    #              ...
    #              <a class="link-grey" href="...">Suunnistuksen Jukolan viesti</a>
    #              <span class="label movie">Elokuva</span>
    #              ...
    #            </div>
    #          </div>
    #          ...
    #          <div class="program-desc">
    #            <p>66. Jukolan viesti. Kolmas, neljäs ja viides osuus...
    #            ...
    #            </p>
    #          </div>
    #        </li>
    #        ...
    #      </ul>
    #      ...
    #    </li>
    #  </ul>
    #
    if (my $container = $root->look_down("class" => qr/^channel-lists\s+/)) {
      if (my $header = $container->look_down("_tag" => "h1",
					     "id"   => $channel)) {
	if (my $parent = $header->parent()) {
	  if (my @programmes = $parent->look_down("class" => qr/^program-entry\s+/)) {
	    foreach my $programme (@programmes) {
	      my $start = $programme->look_down("class", "dtstart");
	      my $end   = $programme->look_down("class", "dtend");
	      my $title  = $programme->look_down("class", "program-title");
	      my $desc  = $programme->look_down("class", "program-desc");

	      if ($start && $end && $title && $desc) {
		$start = UnixDate($start->attr("datetime"), "%s");
		$end   = UnixDate($end->attr("datetime"),   "%s");

		my $link     = $title->find("a");
		my $category = $title->look_down("class" => "label movie") ? "elokuvat" : undef;

		# NOTE: entries with same start and end time are invalid
		if ($start && $end && $link && ($start != $end)) {

		  $title = $link->as_text();
		  $title =~ s/^\s+//;
		  $title =~ s/\s+$//;

		  if (length($title)) {

		    $desc = $desc->find("p");
		    $desc = $desc ? $desc->as_text() : "";
		    $desc =~ s/^\s+//;
		    $desc =~ s/\s+$//;

		    debug(3, "List entry $channel ($start -> $end) $title");
		    debug(4, $desc);
		    debug(4, $category) if defined $category;

		    # Create program object
		    my $object = fi::programme->new($id, $code, $title, $start, $end);
		    $object->category($category);
		    $object->description($desc);
		    push(@objects, $object);
		  }
		}
	      }
	    }
	  }
	}
      }
    }

    # Done with the HTML tree
    $root->delete();

    # Fix overlapping programmes
    fi::programme->fixOverlaps(\@objects);

    return(\@objects);
  }

  return;
}

# That's all folks
1;

###############################################################################
###############################################################################
package main;

# Perl core modules
use Getopt::Long;
use List::Util qw(shuffle);
use Pod::Usage;


# Generate source module list
my @sources;
BEGIN {
  @sources = map { s/::$//; $_ }
    map { "fi::source::" . $_ }
    sort
    grep { ${ $::{'fi::'}->{'source::'}->{$_}->{ENABLED} } }
    keys %{ $::{'fi::'}->{'source::'} };
  die "$0: couldn't find any source modules?" unless @sources;
}

# Import from internal modules
fi::common->import(':main');

# Basic XMLTV modules
use XMLTV::Version "generated from\n\ttv_grab_fi.pl              2.05   2014/06/21  16:36:15\n\tcommon.pm                  2.06   2015/02/24  18:41:32\n\tday.pm                     2.01   2014/06/14  18:18:36\n\tprogramme.pm               2.10   2016/08/20  16:55:13\n\tprogrammeStartOnly.pm      2.03   2016/05/16  16:20:29\n\tfoxtv.pm                   2.04   2016/05/15  16:58:19\n\tiltapulu.pm                2.09   2015/11/20  20:19:21\n\ttelkku.pm                  2.06   2016/08/20  16:55:13\n\ttelvis.pm                  2.04   2015/02/24  18:41:32\n\tyle.pm                     2.11   2016/05/15  15:16:16";
use XMLTV::Capabilities qw(baseline manualconfig cache);
use XMLTV::Description 'Finland (' .
  join(', ', map { $_->description() } @sources ) .
  ')';

# NOTE: We will only reach the rest of the code only when the script is called
#       without --version, --capabilities or --description
# Reminder of XMLTV modules
use XMLTV::Get_nice;
use XMLTV::Memoize;

###############################################################################
#
# Main program
#
###############################################################################
# Forward declarations
sub doConfigure();
sub doListChannels();
sub doGrab();

# Command line option default values
my %Option = (
	      days   => 14,
	      quiet  =>  0,
	      debug  =>  0,
	      offset =>  0,
	     );

# Enable caching. This will remove "--cache [file]" from @ARGV
XMLTV::Memoize::check_argv('XMLTV::Get_nice::get_nice_aux');

# Process command line options
if (GetOptions(\%Option,
	       "configure",
	       "config-file=s",
	       "days=i",
	       "debug|d+",
	       "gui:s",
	       "help|h|?",
	       "list-channels",
	       "no-randomize",
	       "offset=i",
	       "output=s",
	       "quiet",
	       "test-mode")) {

  pod2usage(-exitstatus => 0,
	    -verbose => 2)
    if $Option{help};

  setDebug($Option{debug});
  setQuiet($Option{quiet});

  if ($Option{configure}) {
    # Configure mode
    doConfigure();

  } elsif ($Option{'list-channels'}) {
    # List channels mode
    doListChannels();

  } else {
    # Grab mode (default)
    doGrab();
  }
} else {
  pod2usage(2);
}

# That's all folks
exit 0;

###############################################################################
#
# Utility functions for the different modes
#
###############################################################################
sub _getConfigFile() {
  require XMLTV::Config_file;
  return(XMLTV::Config_file::filename($Option{'config-file'},
				      "tv_grab_fi",
				      $Option{quiet}));
}

{
  my $ofh;

  sub _createXMLTVWriter() {

    # Output file handling
    $ofh = \*STDOUT;
    if (defined $Option{output}) {
      open($ofh, ">", $Option{output})
	or die "$0: cannot open file '$Option{output}' for writing: $!";
    }

    # Create XMLTV writer for UTF-8 encoded text
    binmode($ofh, ":utf8");
    my $writer = XMLTV::Writer->new(
				    encoding => 'UTF-8',
				    OUTPUT   => \*STDOUT,
				   );

    #### HACK CODE ####
    $writer->start({
		    "generator-info-name" => "XMLTV",
		    "generator-info-url"  => "http://xmltv.org/",
		    "source-info-url"     => "multiple", # TBA
		    "source-data-url"     => "multiple", # TBA
		   });
    #### HACK CODE ####

    return($writer);
  }

  sub _closeXMLTVWriter($) {
    my($writer) = @_;
    $writer->end();

    # close output file
    if ($Option{output}) {
      close($ofh) or die "$0: write error on file '$Option{output}': $!";
    }
    message("DONE");
  }
}

sub _addChannel($$$$) {
  my($writer, $id, $name, $language) = @_;
  $writer->write_channel({
			  id             => $id,
			  'display-name' => [[$name, $language]],
			 });
}

{
  my $bar;

  sub _createProgressBar($$) {
    my($label, $count) = @_;
    return if $Option{quiet};

    require XMLTV::Ask;
    require XMLTV::ProgressBar;
    XMLTV::Ask::init($Option{gui});
    $bar = XMLTV::ProgressBar->new({
				    name  => $label,
				    count => $count,
				   });
  }

  sub _updateProgressBar()  { $bar->update() if defined $bar }
  sub _destroyProgressBar() { $bar->finish() if defined $bar }
}

sub _getChannels($$) {
  my($callback, $opaque) = @_;

  # Get channels from all sources
  _createProgressBar("getting list of channels", @sources);
  foreach my $source (@sources) {
    debug(1, "requesting channel list from source '" . $source->description ."'");
    if (my $list = $source->channels()) {
      die "test failure: source '" . $source->description . "' didn't find any channels!\n"
	if ($Option{'test-mode'} && (keys %{$list} == 0));

      while (my($id, $value) = each %{ $list }) {
	my($language, $name) = split(" ", $value, 2);
	$callback->($opaque, $id, $name, $language);
      }
    }
    _updateProgressBar();
  }
  _destroyProgressBar();
}

###############################################################################
#
# Configure Mode
#
###############################################################################
sub doConfigure() {
  # Get configuration file name
  my $file = _getConfigFile();
  XMLTV::Config_file::check_no_overwrite($file);

  # Open configuration file. Assume UTF-8 encoding
  open(my $fh, ">:utf8", $file)
      or die "$0: can't open configuration file '$file': $!";
  print $fh "# -*- coding: utf-8 -*-\n";

  # Get channels
  my %channels;
  _getChannels(sub {
		 # We only need name and ID
		 my(undef, $id, $name) = @_;
		 $channels{$id} = $name;
	       },
	       undef);

  # Query user
  my @sorted  = sort keys %channels;
  my @answers = XMLTV::Ask::ask_many_boolean(1, map { "add channel $channels{$_} ($_)?" } @sorted);

  # Generate configuration file contents from answers
  foreach my $id (@sorted) {
    warn("\nunexpected end of input reached\n"), last
      unless @answers;

    # Write selection to configuration file
    my $answer = shift(@answers);
    print $fh ($answer ? "" : "#"), "channel $id $channels{$id}\n";
  }

  # Check for write errors
  close($fh)
    or die "$0: can't write to configuration file '$file': $!";
  message("DONE");
}

###############################################################################
#
# List Channels Mode
#
###############################################################################
sub doListChannels() {
  # Create XMLTV writer
  my $writer = _createXMLTVWriter();

  # Get channels
  _getChannels(sub {
		 my($writer, $id, $name, $language) = @_;
		 _addChannel($writer, $id, $name, $language);
		 },
	       $writer);

  # Done writing
  _closeXMLTVWriter($writer);
}

###############################################################################
#
# Grab Mode
#
###############################################################################
sub doGrab() {
  # Sanity check
  die "$0: --offset must be a non-negative integer"
    unless $Option{offset} >= 0;
  die "$0: --days must be an integer larger than 0"
    unless $Option{days} > 0;

  # Get configuation
  my %channels;
  {
    # Get configuration file name
    my $file = _getConfigFile();

    # Open configuration file. Assume UTF-8 encoding
    open(my $fh, "<:utf8", $file)
      or die "$0: can't open configuration file '$file': $!";

    # Process configuration information
    while (<$fh>) {

      # Comment removal, white space trimming and compressing
      s/\#.*//;
      s/^\s+//;
      s/\s+$//;
      next unless length;	# skip empty lines
      s/\s+/ /;

      # Channel definition
      if (my($id, $name) = /^channel (\S+) (.+)/) {
	debug(1, "duplicate channel definion in line $.:$id ($name)")
	  if exists $channels{$id};
	$channels{$id} = $name;

      # Programme definition
      } elsif (fi::programme->parseConfigLine($_)) {
	# Nothing to be done here

      } else {
	warn("bad configuration line in file '$file', line $.: $_\n");
      }
    }

    close($fh);
  }

  # Generate list of days
  my $dates = fi::day->generate($Option{offset}, $Option{days});

  # Set up time zone
  setTimeZone();

  # Create XMLTV writer
  my $writer = _createXMLTVWriter();

  # Generate task list with one task per channel and day
  my @tasklist;
  foreach my $id (sort keys %channels) {
    for (my $i = 1; $i < $#{ $dates }; $i++) {
      push(@tasklist, [$id,
		       @{ $dates }[$i - 1..$i + 1],
		       $Option{offset} + $i - 1]);
    }
  }

  # Randomize the task list in order to create a random access pattern
  # NOTE: if you use only one source, then this is basically a no-op
  if (not $Option{'no-randomize'}) {
    debug(1, "Randomizing task list");
    @tasklist = shuffle(@tasklist);
  }

  # For each entry in the task list
  my %seen;
  my @programmes;
  _createProgressBar("getting listings", @tasklist);
  foreach my $task (@tasklist) {
    my($id, $yesterday, $today, $tomorrow, $offset) = @{$task};
    debug(1, "XMLTV channel ID '$id' fetching day $today");
    foreach my $source (@sources) {
      if (my $programmes = $source->grab($id,
					 $yesterday, $today, $tomorrow,
					 $offset)) {

	if (@{ $programmes }) {
	  # Add channel ID & name (once)
	  _addChannel($writer, $id, $channels{$id},
		      $programmes->[0]->language())
	    unless $seen{$id}++;

	  # Add programmes to list
	  push(@programmes, @{ $programmes });
	} elsif ($Option{'test-mode'}) {
	  die "test failure: source '" . $source->description . "' didn't retrieve any programmes for '$id'!\n";
	}
      }
    }
    _updateProgressBar();
  }
  _destroyProgressBar();

  # Dump programs
  message("writing XMLTV programme data");
  $_->dump($writer) foreach (@programmes);

  # Done writing
  _closeXMLTVWriter($writer);
}

###############################################################################
#
# Man page
#
###############################################################################
__END__
=pod

=head1 NAME

tv_grab_fi - Grab TV listings for Finland

=head1 SYNOPSIS

tv_grab_fi [--cache E<lt>FILEE<gt>]
           [--config-file E<lt>FILEE<gt>]
           [--days E<lt>NE<gt>]
           [--gui [E<lt>OPTIONE<gt>]]
           [--no-randomize]
           [--offset E<lt>NE<gt>]
           [--output E<lt>FILEE<gt>]
           [--quiet]

tv_grab_fi  --capabilities

tv_grab_fi  --configure
           [--cache E<lt>FILEE<gt>]
           [--config-file E<lt>FILEE<gt>]
           [--gui [E<lt>OPTIONE<gt>]]
           [--quiet]

tv_grab_fi  --description

tv_grab_fi  --help|-h|-?

tv_grab_fi  --list-channels
           [--cache E<lt>FILEE<gt>]
           [--gui [E<lt>OPTIONE<gt>]]
           [--quiet]

tv_grab_fi  --version

=head1 DESCRIPTION

Grab TV listings for several channels available in Finland. The data comes
from various sources, e.g. www.telkku.com. The grabber relies on parsing HTML,
so it might stop working when the web page layout is changed.

You need to run C<tv_grab_fi --configure> first to create the channel
configuration for your setup. Subsequently runs of C<tv_grab_fi> will grab
the latest data, process them and produce XML data on the standard output.

=head1 COMMANDS

=over 8

=item B<NONE>

Grab mode.

=item B<--capabilities>

Show the capabilities this grabber supports. See also
L<http://wiki.xmltv.org/index.php/XmltvCapabilities>.

=item B<--configure>

Generate the configuration file by asking the users which channels to grab.

=item B<--description>

Print the description for this grabber.

=item B<--help|-h|-?>

Show this help page.

=item B<--list-channels>

Fetch all available channels from the various sources and write them to the
standard output.

=item B<--version>

Show the version of this grabber.

=back

=head1 GENERIC OPTIONS

=over 8

=item B<--cache F<FILE>>

File name to cache the fetched HTML data in. This speeds up subsequent runs
using the same data.

=item B<--gui [OPTION]>

Enable the graphical user interface. If you don't specify B<OPTION> then
XMLTV will automatically choose the best available GUI. Allowed values are:

=over 4

=item B<Term>

Terminal output with a progress bar

=item B<TermNoProgressBar>

Terminal output without progress bar

=item B<Tk>

Tk-based GUI

=back

=item B<--quiet>

Suppress any progress messages to the standard output.

=back

=head1 CONFIGURE MODE OPTIONS

=over 8

=item B<--config-file F<FILE>>

File name to write the configuration to.

Default is F<$HOME/.xmltv/tv_grab_fi.conf>.

=back

=head1 GRAB MODE OPTIONS

=over 8

=item B<--config-file F<FILE>>

File name to read the configuration from.

Default is F<$HOME/.xmltv/tv_grab_fi.conf>.

=item B<--days C<N>>

Grab C<N> days of TV data.

Default is 14 days.

=item B<--no-randomize>

Grab TV data in deterministic order, i.e. first fetch channel 1, days 1 to N,
then channel 2, and so on.

Default is to use a random access pattern. If you only grab TV data from one
source then the randomizing is a no-op.

=item B<--offset C<N>>

Grab TV data starting at C<N> days in the future.

Default is 0, i.e. today.

=item B<--output F<FILE>>

Write the XML data to F<FILE> instead of the standard output.

=back

=head1 CONFIGURATION FILE SYNTAX

The configuration file is line oriented, each line can contain one command.
Empty lines and everything after the C<#> comment character is ignored.
Supported commands are:

=over 8

=item B<channel ID NAME>

Grab information for this channel. C<ID> depends on the source, C<NAME> is
ignored and forwarded as is to the XMLTV output file. This information can be
automatically generated using the grabber in the configuration mode.

=item B<series description NAME>

If a programme title matches C<NAME> then the first sentence of the
description, i.e. everything up to the first period (C<.>), question mark
(C<?>) or exclamation mark (C<!>), is removed from the description and is used
as the name of the episode.

=item B<series title NAME>

If a programme title contains a colon (C<:>) then the grabber checks if the
left-hand side of the colon matches C<NAME>. If it does then the left-hand
side is used as programme title and the right-hand side as the name of the
episode.

=item B<title map "FROM" 'TO'>

If the programme title starts with the string C<FROM> then replace this part
with the string C<TO>. The strings must be enclosed in single quotes (C<'>) or
double quotes (C<">). The title mapping occurs before the C<series> command
processing.

=item B<title strip parental level>

At the beginning of 2012 some programme descriptions started to include
parental levels at the end of the title, e.g. C<(S)>. With this command all
parental levels will be removed from the titles automatically. This removal
occurs before the title mapping.

=back

=head1 SEE ALSO

L<xmltv>.

=head1 AUTHORS

=head2 Current

=over

=item Stefan Becker C<chemobejk at gmail dot com>

=item Ville Ahonen C<ville dot ahonen at iki dot fi>

=back

=head2 Retired

=over

=item Matti Airas

=back

=head1 BUGS

The channels are identified by channel number rather than the RFC2838 form
recommended by the XMLTV DTD.

=cut
