#!/usr/bin/perl -w use strict; # Check a TAC 2009 KBP Track entity-linking task submission for various common errors: # * missing query ID (exactly one link required per query ID) # * invalid query ID # * invalid link (only approximate) # Usage is: # check_kbp_entity-linking.pl results_file # where results_file is the name of the results file to be checked # Results input file is in the form # # link must be one of # 1. NIL # 2. , where is the ID attribute of an # entity element of the TAC 2009 KBP knowledge base # Messages regarding submission are printed to an error log # Change this variable to the directory where the error log should be put my $errlog_dir = "."; my $MAX_ERRORS = 25; # These values are specific to the TAC 2009 KBP track my $prefix = "EL"; my $min_id = 1; my $max_id = 3904; my $results_file; # input file name my @ids; # number of links returned for each query ID my ($errlog,$num_errors,$line_num); my ($id, $id_num, $link); my ($i, $last_i, $line); if ($#ARGV != 0) { print STDERR "Usage: $0 resultsfile \n"; die "\n"; } $results_file = $ARGV[0]; $num_errors = 0; # set up output files $last_i = -1; while ( ($i=index($results_file,"/",$last_i+1)) > -1) { $last_i = $i; } $errlog = $errlog_dir . "/" . substr($results_file,$last_i+1) . ".errlog"; open ERRLOG, ">$errlog" || die "Cannot open error log for writing\n"; # The submission file for the KBP entity-linking task has at exactly one response for # each query ID. # A response line is of the form # open RESULTS, "<$results_file" || die "Unable to open results file $results_file: $!"; $line_num = 0; while ($line = ) { chomp $line; $line_num++; next if ($line =~ /^\s*$/); $line =~ s/^\s*(.*\S)\s*$/$1/g; undef $id, undef $link; ($id, $link) = split " ", $line, 2; if (!defined $link || length($link) == 0) { &error("Wrong number of fields"); next; } # get query id if ($id !~ /^EL([1-9][0-9]*)$/) { # query id numbers are *not* zero-padded &error("Invalid query id: $id"); next; } $id_num = $1; if ($id_num < $min_id || $id_num > $max_id) { &error("Invalid query id number: $id_num"); next; } # make sure link is valid $link =~ s/\s+/ /g; if ($link !~ /^E[0-9]{7}$/ && $link ne "NIL") { &error("Unrecognizable link `$link'"); next; } if (!(defined $ids[$id_num])) { $ids[$id_num] = 1; } else { $ids[$id_num] += 1; } } # Do global check: # error if we don't have exactly one link for each query foreach $id_num ($min_id .. $max_id) { if (! defined $ids[$id_num]) { &error("No KB link given for query EL$id_num"); } elsif ($ids[$id_num] != 1) { &error("Too many judgments ($ids[$id_num]) for query EL$id_num"); } } print ERRLOG "Finished processing $results_file\n"; close ERRLOG || die "Close failed for error log $errlog: $!\n"; if ($num_errors) { exit 255; } exit 0; # print error message, keeping track of total number of errors sub error { my $msg_string = pop(@_); print ERRLOG "$0 $results_file: Error on line $line_num --- $msg_string\n"; $num_errors++; if ($num_errors > $MAX_ERRORS) { print ERRLOG "$0 of $results_file: Quit. Too many errors!\n"; close ERRLOG || die "Close failed for error log $errlog: $!\n"; exit 255; } }