#!/usr/bin/perl -w
use strict;
use CGI qw/:standard/;

# The tags table. Each entry in the table is keyed on a tag, and has as its
# value a list of attributes the tag can take. Note this is only a partial
# list of HTML tags - a full list with all attributes would make for a 
# much sharper testcase.
#
# This table is based on the one in Michal Zalewski's original code, which
# is Copyright (C) 2004 by Michal Zalewski. Used with permission.
#
my %tags = (
"A" =>        [ "NAME", "HREF", "REF", "REV", "TITLE",
		"TARGET", "SHAPE", "onLoad", "STYLE",  ],
"APPLET" =>   [ "CODEBASE", "CODE", "NAME", "ALIGN",
		"ALT", "HEIGHT", "WIDTH", "HSPACE", "VSPACE",
		"DOWNLOAD", "HEIGHT", "NAME", "TITLE", "onLoad", "STYLE",  ],
"AREA" =>     [ "SHAPE", "ALT", "CO-ORDS", "HREF", "onLoad", "STYLE",  ],
"B" =>        [ "onLoad", "STYLE",  ],
"BANNER" =>   [ "onLoad", "STYLE",  ],
"BASE" =>     [ "HREF", "TARGET", "onLoad", "STYLE",  ],
"BASEFONT" => [ "SIZE", "onLoad", "STYLE",  ],
"BGSOUND" =>  [ "SRC", "LOOP", "onLoad", "STYLE",  ],
"BQ" =>       [ "CLEAR", "NOWRAP", "onLoad", "STYLE",  ],
"BODY" =>     [ "BACKGROUND", "BGCOLOR", "TEXT", "LINK",
		"ALINK", "VLINK", "LEFTMARGIN", "TOPMARGIN", 
		"BGPROPERTIES", "onLoad", "STYLE",  ],
"CAPTION" =>  [ "ALIGN", "VALIGN", "onLoad", "STYLE",  ],
"CENTER" =>   [ "onLoad", "STYLE",  ],
"COL" =>      [ "ALIGN", "SPAN", "onLoad", "STYLE",  ],
"COLGROUP" => [ "ALIGN", "VALIGN", "HALIGN", "WIDTH", "SPAN", 
		"onLoad", "STYLE",  ],
"DIV" =>      [ "ALIGN", "CLASS", "LANG", "onLoad", "STYLE",  ],
"EMBED" =>    [ "SRC", "HEIGHT", "WIDTH", "UNITS", "NAME", 
		"PALETTE", "onLoad", "STYLE",  ],
"FIG" =>      [ "SRC", "ALIGN", "HEIGHT", "WIDTH", "UNITS", 
		"IMAGEMAP", "onLoad", "STYLE",  ],
"FN" =>       [ "ID", "onLoad", "STYLE",  ],
"FONT" =>     [ "SIZE", "COLOR", "FACE", "onLoad", "STYLE",  ],
"FORM" =>     [ "ACTION", "METHOD", "ENCTYPE", "TARGET", "SCRIPT", 
		"onLoad", "STYLE",  ],
"FRAME" =>    [ "SRC", "NAME", "MARGINWIDTH", "MARGINHEIGHT", "SCROLLING", 
		"FRAMESPACING", "onLoad", "STYLE",  ],
"FRAMESET" => [ "ROWS", "COLS", "onLoad", "STYLE",  ],
"H1" =>       [ "SRC", "DINGBAT", "onLoad", "STYLE",  ],
"HEAD" =>     [ "onLoad", "STYLE",  ],
"HR" =>       [ "SRC", "SIZE", "WIDTH", "ALIGN", "COLOR", "onLoad",
		"STYLE",  ],
"HTML" =>     [ "onLoad", "STYLE",  ],
"IFRAME" =>   [ "ALIGN", "FRAMEBORDER", "HEIGHT", "MARGINHEIGHT", 
		"MARGINWIDTH", "NAME", "SCROLLING", "SRC", 
		"ADDRESS", "WIDTH", "onLoad", "STYLE",  ],
"IMG" =>      [ "ALIGN", "ALT", "SRC", "BORDER", "DYNSRC", "HEIGHT", 
		"HSPACE", "ISMAP", "LOOP", "LOWSRC", "START", "UNITS", 
		"USEMAP", "WIDTH", "VSPACE", "onLoad", "STYLE",  ],
"INPUT" =>    [ "TYPE", "NAME", "VALUE", "onLoad", "STYLE",  ],
"ISINDEX" =>  [ "HREF", "PROMPT", "onLoad", "STYLE",  ],
"LI" =>       [ "SRC", "DINGBAT", "SKIP", "TYPE", "VALUE", "onLoad",
		"STYLE", ],
"LINK" =>     [ "REL", "REV", "HREF", "TITLE", "onLoad", "STYLE",  ],
"MAP" =>      [ "NAME", "onLoad", "STYLE",  ],
"MARQUEE" =>  [ "ALIGN", "BEHAVIOR", "BGCOLOR", "DIRECTION", "HEIGHT", 
		"HSPACE", "LOOP", "SCROLLAMOUNT", "SCROLLDELAY", 
		"WIDTH", "VSPACE", "onLoad", "STYLE",  ],
"MENU" =>     [ "onLoad", "STYLE",  ],
"META" =>     [ "HTTP-EQUIV", "CONTENT", "NAME", "onLoad", "STYLE",  ],
"MULTICOL" => [ "COLS", "GUTTER", "WIDTH", "onLoad", "STYLE",  ],
"NOFRAMES" => [ "onLoad", "STYLE",  ],
"NOTE" =>     [ "CLASS", "SRC", "onLoad", "STYLE",  ],
"OVERLAY" =>  [ "SRC", "X", "Y", "HEIGHT", "WIDTH", "UNITS", 
		"IMAGEMAP", "onLoad", "STYLE",  ],
"PARAM" =>    [ "NAME", "VALUE", "onLoad", "STYLE",  ],
"RANGE" =>    [ "FROM", "UNTIL", "onLoad", "STYLE",  ],
"SCRIPT" =>   [ "LANGUAGE", "onLoad", "STYLE",  ],
"SELECT" =>   [ "NAME", "SIZE", "MULTIPLE", "WIDTH", "HEIGHT", 
		"UNITS", "onLoad", "STYLE",  ],
"OPTION" =>   [ "VALUE", "SHAPE", "onLoad", "STYLE",  ],
"SPACER" =>   [ "TYPE", "SIZE", "WIDTH", "HEIGHT", "ALIGN", "onLoad",
		"STYLE",  ],
"SPOT" =>     [ "ID", "onLoad", "STYLE",  ],
"TAB" =>      [ "INDENT", "TO", "ALIGN", "DP", "onLoad", "STYLE",  ],
"TABLE" =>    [ "ALIGN", "WIDTH", "BORDER", "CELLPADDING", "CELLSPACING", 
		"BGCOLOR", "VALIGN", "COLSPEC", "UNITS", "DP", "onLoad",
		"STYLE",  ],
"TBODY" =>    [ "CLASS", "ID", "onLoad", "STYLE",  ],
"TD" =>       [ "COLSPAN", "ROWSPAN", "ALIGN", "VALIGN", "BGCOLOR",
		"onLoad", "STYLE",  ],
"TEXTAREA" => [ "NAME", "COLS", "ROWS", "onLoad", "STYLE",  ],
"TEXTFLOW" => [ "CLASS", "ID", "onLoad", "STYLE",  ],
"TFOOT" =>    [ "COLSPAN", "ROWSPAN", "ALIGN", "VALIGN", "BGCOLOR",
		"onLoad", "STYLE",  ],
"TH" =>       [ "ALIGN", "CLASS", "ID", "onLoad", "STYLE",  ],
"TITLE" =>    [ "onLoad", "STYLE",  ],
"TR" =>       [ "ALIGN", "VALIGN", "BGCOLOR", "CLASS", "onLoad", "STYLE",  ],
"UL" =>       [ "SRC", "DINGBAT", "WRAP", "TYPE", "PLAIN", "onLoad",
		"STYLE",  ],
);


# Nasty value generator. Returns a random string, a random number or one of
# a selection of strings which a browser might find painful.
#
sub barbedValue
  {
    my $valueType = int(rand(35));

    if( $valueType == 0 ) {
      return "javascript:".barbedValue();
    }
    if( $valueType == 1 ) {
      return "file:".barbedValue();
    }
    if( $valueType == 2 ) {
      return "http:".barbedValue();
    }
    if( $valueType == 3 ) {
      return "about:".barbedValue();
    }
    if( $valueType == 4 ) {
      return "&".barbedValue().";";
    }
    if( $valueType == 5 ) {
      return barbedValue().barbedValue();
    }
    if( $valueType == 6 ) {
      return "_blank";
    }
    if( $valueType == 7 ) {
      return "_self";
    }
    if( $valueType == 8 ) {
      return "top";
    }
    if( $valueType == 9 ) {
      return "left";
    }
    if( $valueType == 10 ) {
      return "%n%n%n%n%n%n";
    }
    if( $valueType == 11 ) {
      return "\#";
    }
    if( $valueType == 12 ) {
      return "*";
    }
    if( $valueType > 12 and $valueType <= 21 ) {
      return chr(int(rand(256))) x int(rand(8192))
    }
    if( $valueType > 21 and $valueType <= 25 ) {
      return int(rand(2**32));
    }
    if( $valueType > 25 and $valueType <= 29 ) {
      return 0-int(rand(2**32));
    }
    return int(rand(256));
  }


#
# If you get a browser crash and want to look at the generated HTML which
# caused it, check the web server error log. The bottom line shows the 
# random seed used to generate the last page, e.g.: 
#
#    "Running with seed 1519068874"
#
# To recreate that HTML page, pass that number as a seed parameter on the URL:
#
#    http://localhost/cgi-bin/mangler_pl?seed=1519068874
#
# Obviously you'll have to use wget or a different browser. ;)
#

print "Content-Type: text/html\n\n<HTML><HEAD>\n";
if( !defined(param("seed")) ) {
  print "<META HTTP-EQUIV=\"Refresh\" content=\"0;URL=mangle_pl.cgi\">\n";
  my $seed = time() ^ ($$ + ($$ << 15));
  srand( $seed );
  print STDERR ("Running with seed ", $seed, "\n");  
} else {
  print STDERR ("Running with provided seed ", param("seed"), "\n");
  srand( param("seed") );
}


my $numTagsRequired = int(rand(100));
for( my $tagCount=0; $tagCount < $numTagsRequired; $tagCount++ ) {
  print "<";

  # Maybe throw in a close tag char at the begining of the tag,
  # or maybe throw in a random char
  # 
  if( int(rand(5)) == 0 ) {
    print "/";
  } elsif( int(rand(5)) == 0 ) {
    print chr( int(rand(256)) );
  }

  # Pick a tag to abuse
  #
  my $tag = (keys %tags)[int(rand(scalar(keys %tags)))];
  print $tag;

  # Make some attributes
  #
  my $numAttsRequired = int(rand(20));
  for( my $attrCount=0; $attrCount < $numAttsRequired; $attrCount++ ) {

    # Maybe throw in a random char, but normally a space
    #
    if( int(rand(16)) == 0 ) {
      print chr( int(rand(256)) );
    } elsif( int(rand(16)) != 0 ) {
      print " ";
    }

    # Pick an attribute
    #
    my @attribList = @{$tags{$tag}};
    print $attribList[rand(scalar( @attribList ))];

    # Print an equals (normally) then give the attribute a value
    # 
    if( int(rand(16)) == 0 ) {
      print chr( int(rand(256)) );
    } elsif( int(rand(16)) != 0 ) {
      print "=";
    } 
    print barbedValue();
  }

  # Close the tag
  #
  print ">\n";
}
