The following is just a one-shot Perl script I used to convert one's XML to a CSV file. For me(Perl beginner) it took a while to figure out a fast and simple way to parse XML in Perl. So it might be useful for someone else.
#!/usr/bin/perl
use strict; use warnings;
use XML::Twig;
use Text::CSV_XS;
my @headers = (
'Picture',
'StateID',
'ItemTypeID',
'Name',
'Descript',
'Article',
'Price',
'Qty',
'ItemAvailabilityID',
'SerialNumber',
'Weight',
);
my %twig_handlers = (
'Item' => \&handle_item,
);
foreach (@headers) {
$twig_handlers{$_} = \&handle_tag;
}
my $twig = XML::Twig->new(twig_handlers => \%twig_handlers);
my $csv = Text::CSV_XS->new({sep_char => ';'});
my @columns;
my $have_picture = 0;
print join(';', @headers), "\n";
$twig->parsefile(shift @ARGV);
sub handle_tag() {
if ($_->gi eq 'Picture') {
$have_picture = 1;
push @columns, $_->{'att'}->{'url'};
} else {
if ($_->gi eq 'StateID' && !$have_picture) {
push @columns, "";
}
push @columns, $_->trimmed_text;
}
}
sub handle_item() {
$csv->print(\*STDOUT, \@columns);
# handlers are called when elements closed
print "\n";
# reset
@columns = ();
$have_picture = 0;
}
The XML was similar to the following:
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE Items SYSTEM "http://domain.com/xml/MerchantItems.dtd">
<Items>
<Item>
<Images>
<Picture url="http://domain.com/775/7757d248aeb782e916a9a51670171ec1.jpeg"/>
</Images>
<StateID>1735000</StateID>
<ItemTypeID>29490209</ItemTypeID>
<Name><![CDATA[Lamp 2541/8]]></Name>
<Descript><![CDATA[ ... HTML code ... ]]></Descript>
<Article>2541/8</Article>
<Price>7267.50</Price>
<Qty>100</Qty>
<ItemAvailabilityID>3719000</ItemAvailabilityID>
<SerialNumber>92645</SerialNumber>
<Weight>0</Weight>
</Item>
<!-- ... -->
</Item>
</Items>