J'essaie d'obtenir tout un tas de valeurs d'environ 3000 fichiers HTML et de les enregistrer dans une feuille de calcul.

J'utilise HTML::TreeBuilder pour traiter le HTML et créer une feuille de calcul à l'aide de Spreadsheet::WriteExcel.

Mais mon script ne parvient pas à obtenir les valeurs. je vois

Utilisation de la valeur non initialisée $ val dans la concaténation (.) Ou de la chaîne à la ligne 63 de spreadsheet.pl.

Que pourrais-je faire de mal?

Voici un exemple de mes fichiers HTML sur pastebin.com. Il était trop volumineux pour être affiché dans la question.

Mon code Perl

use warnings 'all';
use strict;

use LWP::Simple 'get';
use Spreadsheet::WriteExcel;
use HTML::TreeBuilder;
use Path::Tiny;
use constant URL => 'http://pastebin.com/raw/qLwu80ZW';

my $teamNumber  = "";
my $teamName    = "";
my $schoolName  = "";
my $area        = "";
my $district    = "";
my $agDeptPhone = "";
my $schoolPhone = "";
my $fax         = "";
my $addressOne  = "";
my $addressTwo  = "";
my $city        = ""; 
my $state       = "";
my $zipCode     = "";
my $name        = "";
my $email       = "";
my $row         = "";
my $Ypos        = 0; 

my $path = "Z:\\_WEB_CLIENTS\\Morgan Livestock\\Judging Card";

my $workbook  = Spreadsheet::WriteExcel->new('perlOutput.xlsx');
my $worksheet = $workbook->add_worksheet();


sub getTeamNumber {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_TeamNumber/)->attr('value');
    }

    print "Got Team Number $val\n";

    return $val;
}


sub getTeamName {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_TeamName/)->attr('value');
    }

    print "Got Team Name $val\n";

    return $val;
}


sub getSchoolName {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(tag_ => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_SchoolName/)->attr('value');
    }

    print "Got School Name $val\n";

    return $val;
}


sub getArea{
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(tag_ => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_Area/)->attr('value');
    }

    print "Got Area $val\n";

    return $val;
}


sub getDistrict{
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_District/)->attr('value');
    }

    print "Got District $val\n";

    return $val;
}


sub getDeptPhone {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_Phone/)->attr('value');
    }

    print "Got Dept Phone $val\n";

    return $val;
}


sub getSchoolPhone{
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_Phone2/)->attr('value');
    }

    print "Got School Phone $val\n";

    return $val;
}


sub getFax{
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_Fax/)->attr('value');
    }

    print "Got Fax $val\n";

    return $val;
}


sub getAddress1 {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_Address1/)->attr('value');
    }

    print "Got Address One $val\n";

    return $val;
}


sub getAddress2 {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_Address2/)->attr('value');
    }

    print "Got Address Two $val\n";

    return $val;
}


sub getCity {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_City/)->attr('value');
    }

    print "Got Address Two $val\n";

    return $val;
}


sub getState {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_State/)->attr('value');
    }

    print "Got State $val\n";

    return $val;
}


sub getZip {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_Zip/)->attr('value');
    }

    print "Got Zip $val\n";

    return $val;
}


sub getWebsite {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $val;

    foreach my $node (@nodes) {
        $val = $node->look_down('name', qr/\$txt_Website/)->attr('value');
    }

    print "Got Website $val\n";

    return $val;
}


sub getNameAndEmail {
    my ($file) = @_;

    my $tree = HTML::TreeBuilder->new_from_content(get URL);
    my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');

    for my $tr ( $table->look_down(_tag => 'tr') ) {
        next unless my @td = $tr->look_down(_tag => 'td');
        my ($name, $email) = map { $_->as_trimmed_text } @td[0,1];
    }

    print "Got Name and Email $name and $email\n";

    return ($name, $email);
}

# FILLER: This fills the spreadsheet with all the variables we've acquired

sub fill {
    my ($name, $email, $teamNumber, $teamName, $schoolName,
        $area, $district, $agDeptPhone, $schoolPhone,
        $fax, $addressOne, $addressTwo, $city, $state, $zipCode) = (@_);

    $worksheet->write($Ypos, 1, $name);
    $worksheet->write($Ypos, 2, $email);
    $worksheet->write($Ypos, 3, $teamNumber);
    $worksheet->write($Ypos, 4, $teamName);
    $worksheet->write($Ypos, 5, $schoolName);
    $worksheet->write($Ypos, 6, $area);
    $worksheet->write($Ypos, 7, $district);
    $worksheet->write($Ypos, 8, $agDeptPhone);
    $worksheet->write($Ypos, 9, $schoolPhone);
    $worksheet->write($Ypos, 10, $fax);
    $worksheet->write($Ypos, 11, $addressOne);
    $worksheet->write($Ypos, 12, $addressTwo);
    $worksheet->write($Ypos, 13, $city);
    $worksheet->write($Ypos, 14, $state);
    $worksheet->write($Ypos, 15, $zipCode);
}

# Open judgingcard directory

opendir (DIR, $path) or die "Unable to open directory 'Judging Card': $!";

my @files = readdir(DIR);

# This fills out all top row info

$worksheet->write("A1", "Name");
$worksheet->write("B1", "Email");
$worksheet->write("C1", "Team Number");
$worksheet->write("D1", "Team Name");
$worksheet->write("E1", "School Name");
$worksheet->write("F1", "Area");
$worksheet->write("G1", "District");
$worksheet->write("H1", "Ag Dept Phone");
$worksheet->write("I1", "School Phone");
$worksheet->write("J1", "Fax");
$worksheet->write("K1", "Address One");
$worksheet->write("L1", "Address Two");
$worksheet->write("M1", "City");
$worksheet->write("N1", "State");
$worksheet->write("O1", "Zip Code");

###################################

foreach my $file (@files) { # run through all files in directory

    next if (-d $file); # Skip file if file is folder

    $Ypos = $Ypos + 1;

    my ($name1, $email1) = getNameAndEmail($file);

    $name        = $name1;
    $email       = $email1;
    $teamNumber  = getTeamNumber($file);
    $teamName    = getTeamName($file);
    $schoolName  = getSchoolName($file);
    $area        = getArea($file);
    $district    = getDistrict($file);
    $agDeptPhone = getDeptPhone($file);
    $schoolPhone = getSchoolPhone($file);
    $fax         = getFax($file);
    $addressOne  = getAddress1($file);
    $addressTwo  = getAddress2($file);
    $city        = getCity($file);
    $state       = getState($file);
    $zipCode     = getZip($file);

    fill($name, $email, $teamNumber, $teamName, $schoolName,
        $area, $district, $agDeptPhone, $schoolPhone, $fax,
        $addressOne, $addressTwo, $city, $state, $zipCode);

    print "Progressing                    $file                ($Ypos)\n"
}

closedir(DIR);


sub getTeamNumber {
    my ($file) = @_;

    my $html = path($file);
    my $tree = HTML::TreeBuilder->new_from_content($html);
    my @nodes = $tree->look_down(_tag => 'input');

    my $name;
    my $val;

    foreach my $node (@nodes) {
        $name = $node->look_down('name', qr/\$txt_TeamNumber/);
    }

    if ( ! defined $name ) {
        print "Couldn't get team number\n";
    }

    if ( $name ) {
        $val = $name->attr('value');
        print "Got Team number $val\n";
    }

    return $val;
}

Nouveau script:

use LWP::Simple 'get';
use Spreadsheet::WriteExcel;
use HTML::TreeBuilder;
use Path::Tiny;

my $path = "Z:\\_WEB_CLIENTS\\Morgan Livestock\\Judging Card";

my $workbook  = Spreadsheet::WriteExcel->new('perlOutput.xlsx');
my $worksheet = $workbook->add_worksheet();

opendir (DIR, $path) or die "Unable to open directory 'Judging Card': $!";
my @files = readdir(DIR);

# Specify spreadsheet headers in desired order and write  to file
my @headers = ('Name', 'Email', 'Team Number', 'Team Name', 'School Name', 'Area', 'District', 'Ag Dept Phone', 'School Phone', 'Fax', 'Address One'
    , 'Address Two', 'City', 'State', 'Zip Code');
$worksheet->write_row(0, 0, \@headers);               # first row

# Build ancillary data structures to later sort results by this order
# each header with its index from @headers (specifies columns' order)
my %ho = map { state $idx; $_ => ++$idx } @headers;
# each name (`TeamNumber` ...) with the index of its header
my %name_order = ( Name => $ho{Name}, Email => $ho{Email}, 
    TeamNumber => $ho{'Team Number'}, TeamName => $ho{'Team Name'}, SchoolName => $ho{'School Name'}, Area => $ho{'Area'}, District => $ho{'District'}, 
        AgDeptPhone => $ho{'Ag Dept Phone'}, SchoolPhone => $ho{'School Phone'}, Fax => $ho{'Fax'}, AddressOne => $ho{'Address One'},
        AddressTwo => $ho{'Address Two'}, City => $ho{'City'}, State => $ho{'State'}, Zip => $ho{'Zip Code'});
        
sub getNames {
    my ($file) = @_;
    my $tree = HTML::TreeBuilder->new_from_content( path($file) );
    my @nodes = $tree->look_down(_tag => 'input');

    # List phrases to find, and build hash with their derived names
    # Should probably be defined globally, once for the whole program
    my @patterns  = map { '$txt_' . $_ } 
        qw(TeamName TeamNumber SchoolName Area District 
           Phone Phone2 Fax Address1 Address2 City State Zip Website);

    # Name for each pattern: everything after first _ (so after $txt_) 
    my %patt_name = map { $_ => (/[^_]+_(.*)/)[0] } @patterns;
    my %name_val;
    foreach my $node (@nodes) {
        foreach my $patt (@patterns) {
            my $name = $node->look_down('name', qr/\Q$patt/);
            if ($name) {
                $name_val{$patt_name{$patt}} = $name->attr('value') || '';
            }
        }
    }

    # Name and Email are stored differently. Fetch those now
    my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');
    for my $tr ( $table->look_down(_tag => 'tr') ) { 
        next unless my @td = $tr->look_down(_tag => 'td');
        # Discard incomplete Name-Email records -- either both or none
        @name_val{qw(Name Email)} = 
            map { (defined) ? $_->as_trimmed_text : '' } @td[0,1];
    }
    return \%name_val;
}

sub fill_row {
   my ($ws, $row, $rdata, $rorder) = @_;    
   my %name_val   = %$rdata;
   my %name_order = %$rorder;

   my @vals = map { $name_val{$_} } 
              sort { $name_order{$a} <=> $name_order{$b} } 
              keys %name_val;

   $ws->write_row($row, 0, \@vals);  # add check (returns 0 on success)

   return 1;

    my $row = 1;
}

foreach my $file (@files) {
    next if -d $file;

    my %name_val = %{ getNames($file) };

    foreach my $name (sort keys %name_val) {
        # Fill the spreadsheet with all info in one go
        if ($name_val{$name}) {
            print "$name => $name_val{$name}\n";
        } else {
            print "Not found $name in $file\n";
        }
    }

    
    my %name_val = %{ getNames($file) };
    fill_row($worksheet, $row++, \%name_val, \%name_order);

    foreach my $name (sort keys %name_val) {  # demo
        if ($name_val{$name}) { print "$name => $name_val{$name}\n" }
        else                  { print "Not found $name in $file\n" }
    }
    
    print "Progressing $Ypos \n"
}

                                                                                                  
2
Ultracrepidarian 3 avril 2017 à 22:31

2 réponses

Meilleure réponse

En bref, certains de ces 'name' ne se trouvent probablement tout simplement pas dans (certains) fichiers HTML. Alors testez d'abord pour voir s'il est là, puis écrivez dans $val ou affichez un message indiquant qu'il n'est pas trouvé.

La chose la plus apparente à améliorer: il n'y a pas besoin de fonctions séparées. Vous pouvez les rechercher et les trouver tous en un seul appel et les stocker dans le hachage name => value, qui est renvoyé.

sub getNames {
    my ($file) = @_;
    my $tree = HTML::TreeBuilder->new_from_content( path($file) );
    my @nodes = $tree->look_down(_tag => 'input');

    # List phrases to find, and build hash with their derived names
    # Should probably be defined globally, once for the whole program
    my @patterns  = map { '$txt_' . $_ } 
        qw(TeamName TeamNumber SchoolName Area District 
           Phone Phone2 Fax Address1 Address2 City State Zip Website);

    # Name for each pattern: everything after first _ (so after $txt_) 
    my %patt_name = map { $_ => (/[^_]+_(.*)/)[0] } @patterns;
    my %name_val;
    foreach my $node (@nodes) {
        foreach my $patt (@patterns) {
            my $name = $node->look_down('name', qr/\Q$patt/);
            if ($name) {
                $name_val{$patt_name{$patt}} = $name->attr('value') || '';
            }
        }
    }

    # Name and Email are stored differently. Fetch those now
    my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');
    for my $tr ( $table->look_down(_tag => 'tr') ) { 
        next unless my @td = $tr->look_down(_tag => 'td');
        # Discard incomplete Name-Email records -- either both or none
        if (2 == grep { not ref $_ } @td) {
            @name_val{qw(Name Email)} = map { $_->as_trimmed_text } @td[0,1];
        }
        else { @name_val{qw(Name Email)} = ('', '') }
    }
    return \%name_val;
}

Pour Name et Email, nous avons besoin que les deux soient là sous forme de texte, ou les deux sont rejetés. (L'exemple de source contient There are no people ... dans un div pour Name, et rien pour Email.)

Pour obtenir tout ce qui est là, au lieu de if-else ci-dessus, utilisez

@name_val{qw(Name Email)} = 
    map { (defined) ? $_->as_trimmed_text : '' } @td[0,1];

Et nous obtenons la note citée ci-dessus pour Name et une chaîne vide pour Email, avec cet exemple.

Alors

# Specify spreadsheet headers in desired order and write  to file
my @headers = ('Name', 'Email', 'Team Number', 'Team Name', ...);
$worksheet->write_row(0, 0, \@headers);               # first row

# Build ancillary data structures to later sort results by this order
# each header with its index from @headers (specifies columns' order)
my %ho = map { state $idx; $_ => ++$idx } @headers;
# each name (`TeamNumber` ...) with the index of its header
my %name_order = ( Name => $ho{Name}, Email => $ho{Email}, 
    TeamNumber => $ho{'Team Number'}, TeamName => $ho{'Team Name'}, ... 
);

my $row = 1;
foreach my $file (@files) {
    next if -d $file;

    my %name_val = %{ getNames($file) };
    fill_row($worksheet, $row++, \%name_val, \%name_order);

    foreach my $name (sort keys %name_val) {  # demo
        if ($name_val{$name}) { print "$name => $name_val{$name}\n" }
        else                  { print "Not found $name in $file\n" }
    }
}

sub fill_row {
   my ($ws, $row, $rdata, $rorder) = @_;    
   my %name_val   = %$rdata;
   my %name_order = %$rorder;

   my @vals = map { $name_val{$_} } 
              sort { $name_order{$a} <=> $name_order{$b} } 
              keys %name_val;

   $ws->write_row($row, 0, \@vals);  # add check (returns 0 on success)

   return 1;
}

Le write_row prend une référence à un tableau et écrit une ligne avec ses éléments. Notez que write peut également être utilisé de cette façon, quand un arrayref est donné.

La sortie sur le fichier HTML lié

Area => 1
District => 1
State => NM
TeamName => Ruidoso
TeamNumber => 83

Et Not found ... pour les autres. Le fichier .xls est correct (lorsque la liste complète des noms est utilisée).


L'ensemble du programme

use warnings;
use strict;
use feature qw(say state);

use Path::Tiny;
use HTML::TreeBuilder;
use Spreadsheet::WriteExcel;


my @src = qw(TeamName TeamNumber SchoolName Area District Phone Phone2 
    Fax Address1 Address2 City State Zip Website);
my @headers = ('Name', 'Email', 'Team Number', 'Team Name', 'School Name', 
    'Area', 'District', 'Ag Dept Phone', 'School Phone', 'Fax', 'Address One', 
    'Address Two', 'City', 'State', 'Zip Code', 'Web Site'
);
my @lens = map { length } @headers;  # for printing

# Numeric order of headers' fields (so, columns)
my %ho = map { state $idx; $_ => ++$idx } @headers;
# Translation: name from HTML source => column number (retrieved from %ho)
my %name_order = ( 
    Name => $ho{Name}, Email => $ho{Email}, 
    TeamNumber => $ho{'Team Number'},
    TeamName => $ho{'Team Name'}, 
    SchoolName => $ho{'School Name'}, Area => $ho{'Area'},
    District => $ho{'District'}, Phone2 => $ho{'Ag Dept Phone'},
    Phone => $ho{'School Phone'}, Fax => $ho{'Fax'}, 
    Address1 => $ho{'Address One'},
    Address2 => $ho{'Address Two'}, 
    City => $ho{'City'}, State => $ho{'State'}, 'Zip' => $ho{'Zip Code'},
    Website => $ho{'Web Site'}
);

say "Order (column) of names from HTML source to follow headers:";
printf("%-10s  ==>  %s\n", $_, $name_order{$_})
    for sort { $name_order{$a} <=> $name_order{$b} } keys %name_order;
say '';

my $workbook  = Spreadsheet::WriteExcel->new('data.xls');
my $worksheet = $workbook->add_worksheet();

# Print headers to .xls file (and to screen)
$worksheet->write_row(0, 0, \@headers);
say "Spreadsheet, header and rows:";
prn_row(\@headers);  # print to screen

my @files = ('fetch_names.html');
my $row = 1;
foreach my $file (@files) {
    next if -d $file;

    # Parse the file, print the row to spreadsheet
    my %name_val = %{ getNames($file) };
    fill_row($worksheet, $row++, \%name_val, \%name_order);
}


# Functions

sub fill_row {
    my ($ws, $row, $rdata, $rorder) = @_;

    my %name_val = %$rdata;
    my $name_order = %$rorder;

    my @vals =
        map { $name_val{$_} }
        sort { $name_order{$a} <=> $name_order{$b} }
        grep { exists $name_order{$_} }
        keys %name_val;

    prn_row(\@vals);  # print to screen

    $worksheet->write_row($row, 0, \@vals);  # test this (returns 0 on success)

    return 1;
}

sub prn_row {
    my @ary = @{ $_[0] };
    for (0..$#ary) {
        my $len = $lens[$_];
        printf("%${len}s  ", $ary[$_]);
    }
    say '';
}

sub getNames {
    my ($file) = @_;
    my $tree = HTML::TreeBuilder->new_from_content( path($file)->slurp );
    my @nodes = $tree->look_down(_tag => 'input');

    my @patterns = map { '$txt_' . $_ } @src;
    # List phrases to find, and build hash with their derived names
    # Name for each pattern: everything first _ (so after \$txt_)
    my %patt_name = map { $_ => (/[^_]+_(.*)/)[0] } @patterns;

    my %name_val;
    foreach my $node (@nodes) {
        foreach my $patt (@patterns) {
            my $name = $node->look_down('name', qr/\Q$patt/) or next;
            $name_val{$patt_name{$patt}} = $name->attr('value') // '';
        }
    }

    # Name and Email are stored differently, fetch those now
    my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');
    for my $tr ( $table->look_down(_tag => 'tr') ) {
        next unless my @td = $tr->look_down(_tag => 'td');
        # Discard incomplete Name-Email records -- either both or none
        if (2 == grep { not ref } @td) {
            @name_val{qw(Name Email)} = map { $_->as_trimmed_text } @td[0,1];
        }
        else { @name_val{qw(Name Email)} = ('', '') }
    }
    return \%name_val;
}

Cela fonctionne comme un programme complet avec l'exemple de source HTML fourni.


Ajout: une page réelle peut avoir plusieurs paires nom-adresse e-mail

use LWP::Simple qw(get);

sub getNames {
    my ($file) = @_; 
    my $url = 'https://www.judgingcard.com/Directory/Directory.aspx?ID=1643';
    my $page = get($url) or die "Can't get the page $url: $!";
    my $tree = HTML::TreeBuilder->new_from_content( $page );

    my @nodes = $tree->look_down(_tag => 'input');

    my @patterns = map { '$txt_' . $_ } @src;
    # List phrases to find, and build hash with their derived names
    # Name for each pattern: everything first _ (so after \$txt_)
    my %patt_name = map { $_ => (/[^_]+_(.*)/)[0] } @patterns;

    my %name_val;
    foreach my $node (@nodes) {
        foreach my $patt (@patterns) {
            my $name = $node->look_down('name', qr/\Q$patt/) or next;
            $name_val{$patt_name{$patt}} = $name->attr('value') // ''; 
        }   
    }   

    # Name and Email are stored differently, fetch those now
    my %name_email;
    my ($table) = $tree->look_down(_tag => 'table', class => 'rgMasterTable');
    for my $tr ( $table->look_down(_tag => 'tr') ) { 
        next unless my @td = $tr->look_down(_tag => 'td');
        # There may be more than one Name-Email pair
        # so enter key-value pair explicitely
        if (2 <= grep { ref } @td) {
            $name_email{$td[0]->as_trimmed_text} = $td[1]->as_trimmed_text;
        }
        else { %name_email = ('', '') }
    }
    return \%name_val, \%name_email;
}

Ensuite, dans l'ensemble, vous avez besoin

foreach my $file (@files) {
    next if -d $file;

    # Parse the file, unpack name-value and name-email hashes
    my ($rname_val, $rname_email) = getNames($file);
    my %name_val   = %$rname_val;
    my %name_email = %$rname_email;

    # Print a row for each Name-Email, adding them to %name_val
    foreach my $name (keys %name_email) {
        $name_val{Name}  = $name;
        $name_val{Email} = $name_email{$name};
        fill_row($worksheet, $row++, \%name_val, \%name_order);
    }
}

Le format souhaité avec plusieurs paires Nom-E-mail est: les mêmes en-têtes et pour chaque paire, une ligne distincte est imprimée dans le fichier, où toutes les informations autres que Nom-E-mail sont les mêmes.

La feuille de calcul imprimée (l'URL utilisée a été fournie dans les commentaires)

Printed spreadsheet. URL was provided in comments

1
zdim 24 avril 2017 à 18:13

Vous pourriez probablement réduire le code. Même si dans l'exemple suivant je n'utilise pas HTML :: TreeBuidler, l'approche est similaire. En utilisant Mojo :: DOM58,

use 5.014;
use warnings;

use Mojo::DOM58;
use Path::Tiny;
use Data::Dumper;

my @fields = qw( TeamName TeamNumber SchoolName Area District Phone Phone2 Fax Address1 Address2 City State Zip Website );

my $html = path('team.html')->slurp;
my $dom = Mojo::DOM58->new($html);

my $data;
for my $field( @fields ) {
    $data->{$field} = $dom->at(qq{input[name*="txt_$field"]})->attr('value') // "";
}

say Dumper $data;

Impressions:

$VAR1 = {
          'TeamName' => 'Ruidoso',
          'Zip' => '',
          'State' => 'NM',
          'City' => '',
          'District' => '1',
          'Phone2' => '',
          'Area' => '1',
          'SchoolName' => '',
          'Address2' => '',
          'Website' => '',
          'Address1' => '',
          'Phone' => '',
          'Fax' => '',
          'TeamNumber' => '83'
        };
3
jm666 3 avril 2017 à 21:00