#!/usr/bin/perl -w # # $Revision: 1.1.1.1 $ # # $Date: 2003-07-27 11:07:11 $ use XML::Parser; my $Usage = <<'End_of_Usage;'; Usage is: xmlfilter [-h] [-nl] [{-+}root] [{-+}el=elname] [{-+}el:elnamepat] [{-+}att:attname] [{-+}att:attname:attvalpat] xmlfile Prints on standard output the result of filtering the given xmlfile for elements according to the switches. A '-' option will drop the element from the output; a '+' will keep it. The output should also be a well-formed XML document. -h Print this message -nl Emit a newline prior to every start tag. [-+]root Drop (or keep) the root element. Defaults to keep. If the root element were named "foo", then -root would be equivalent to -el=foo. Note that even if you're dropping the root element, it's start and end tag are kept in order that the output remains a well-formed XML document. [-+]el=elname Drop (or keep) elements of type elname. [-+]el:elnamepat Drop (or keep) element whose type name matches elnamepat. [-+]att:attname Drop (or keep) elements which have an attribute = attname. [-+]att:attname:attvalpat Drop (or keep) elements which have an attribute = attname and for which the attribute value matches attvalpat. End_of_Usage; my $pass = 1; my $do_newline = 0; my $attcheck = 0; my %drop_el; my @drop_elpat; my %keep_el; my @keep_elpat; my %drop_att; my %keep_att; my $always_true = sub { 1; }; my $root_element = ''; my $in_cdata = 0; # Process options while ( defined( $ARGV[0] ) and $ARGV[0] =~ /^[-+]/ ) { my $opt = shift; if ( $opt eq '-root' ) { $pass = 0; } elsif ( $opt eq '+root' ) { $pass = 1; } elsif ( $opt eq '-h' ) { print $Usage; exit; } elsif ( $opt eq '-nl' ) { $do_newline = 1; } elsif ( $opt =~ /^([-+])el([:=])(\S*)/ ) { my ( $disp, $kind, $pattern ) = ( $1, $2, $3 ); my ( $hashref, $aref ); if ( $disp eq '-' ) { $hashref = \%drop_el; $aref = \@drop_elpat; } else { $hashref = \%keep_el; $aref = \@keep_elpat; } if ( $kind eq '=' ) { $hashref->{$pattern} = 1; } else { push( @$aref, $pattern ); } } elsif ( $opt =~ /^([-+])att:(\w+)(?::(\S*))?/ ) { my ( $disp, $id, $pattern ) = ( $1, $2, $3 ); my $ref = ( $disp eq '-' ) ? \%drop_att : \%keep_att; if ( defined($pattern) ) { $pattern =~ s!/!\\/!g; my $sub; eval "\$sub = sub {\$_[0] =~ /$pattern/;};"; $ref->{$id} = $sub; } else { $ref->{$id} = $always_true; } $attcheck = 1; } else { die "Unknown option: $opt\n$Usage"; } } my $drop_el_pattern = join( '|', @drop_elpat ); my $keep_el_pattern = join( '|', @keep_elpat ); my $drop_sub; if ($drop_el_pattern) { eval "\$drop_sub = sub {\$_[0] =~ /$drop_el_pattern/;}"; } else { $drop_sub = sub { }; } my $keep_sub; if ($keep_el_pattern) { eval "\$keep_sub = sub {\$_[0] =~ /$keep_el_pattern/;}"; } else { $keep_sub = sub { }; } my $doc = shift; die "No file specified\n$Usage" unless defined($doc); my @togglestack = (); my $p = new XML::Parser( ErrorContext => 2, Handlers => { Start => \&start_handler, End => \&end_handler } ); if ($pass) { $p->setHandlers( Char => \&char_handler, CdataStart => \&cdata_start, CdataEnd => \&cdata_end ); } $p->parsefile($doc); print "\n" unless $pass; ################ ## End of main ################ sub start_handler { my $xp = shift; my $el = shift; unless ($root_element) { $root_element = $el; print "<$el>\n" unless $pass; } my ( $elref, $attref, $sub ); if ($pass) { $elref = \%drop_el; $attref = \%drop_att; $sub = $drop_sub; } else { $elref = \%keep_el; $attref = \%keep_att; $sub = $keep_sub; } if ( defined( $elref->{$el} ) or &$sub($el) or check_atts( $attref, @_ ) ) { $pass = !$pass; if ($pass) { $xp->setHandlers( Char => \&char_handler, CdataStart => \&cdata_start, CdataEnd => \&cdata_end ); } else { $xp->setHandlers( Char => 0, CdataStart => 0, CdataEnd => 0 ); } push( @togglestack, $xp->depth ); } if ($pass) { print "\n" if $do_newline; print "<$el"; while (@_) { my $id = shift; my $val = shift; $val = $xp->xml_escape( $val, "'" ); print " $id='$val'"; } print ">"; } } # End start_handler sub end_handler { my $xp = shift; my $el = shift; if ($pass) { print ""; } if ( @togglestack and $togglestack[-1] == $xp->depth ) { $pass = !$pass; if ($pass) { $xp->setHandlers( Char => \&char_handler, CdataStart => \&cdata_start, CdataEnd => \&cdata_end ); } else { $xp->setHandlers( Char => 0, CdataStart => 0, CdataEnd => 0 ); } pop(@togglestack); } } # End end_handler sub char_handler { my ( $xp, $text ) = @_; if ( length($text) ) { $text = $xp->xml_escape( $text, '>' ) unless $in_cdata; print $text; } } # End char_handler sub cdata_start { my $xp = shift; print ''; $in_cdata = 0; } sub check_atts { return $attcheck unless $attcheck; my $ref = shift; while (@_) { my $id = shift; my $val = shift; if ( defined( $ref->{$id} ) ) { my $ret = &{ $ref->{$id} }($val); return $ret if $ret; } } return 0; } # End check_atts # Tell Emacs that this is really a perl script # Local Variables: # mode:perl # End: