#!/usr/local/bin/perl $|++; #dont't buffer output use strict; # file name:: perl_html.pl # Created on Tuesday, December 19, 2000 using # Perl Studio by AyerSoft copyright(C) 2000 ################################################### # You are free to customize this script as you wish, and # redistribute it without written permission from AyerSoft. # DISCLAIMER # The information and code provided is provided 'as is' without # warranty of any kind, either express or implied. In no event # shall the AyerSoft Company be liable for any damages whatsoever # including direct, indirect, incidental, consequential, loss of # business profits or special damages, even if the author has been # advised of the possibility of such damages. # DO NOT USE THIS SCRIPT UNLESS YOU CAN FULLY AGREE WITH THIS # DISCLAIMER. ################################################### =head1 script to create the HTML HELP project files from a directory of html files =cut ##################################################################### # list of keywords to ignore when creating the index.hhk file ##################################################################### my @ignore = qw(that then than street city country address two tom this these use using wish with without written writing would in what how who which when where do you your find site get and or if a the for an it of from by the to he most all about i me search is are be been with why will was want some something should easier does depending width wish white whatsoever was vice via vastly variety usually used useful until unlike united typeface keep know knows known likely might yourself with within whether whenever very those these there their then way talk thank them they thing things maybe may ready real put puts proudly prove power popular please poor plain per part page own otherwise other our only one onto old older once off now non now nor must much more move many lots looks lies likewise just july its itself january into inside ieee hope her he has hats had guys fri four fully fresh fly few fewer far due done david day custom nearly need natural nature needed needing needs neither never newer nice nick occasionally ones ops oses otherargs over); ##################################################################### # max number of keywords to add to index.hhk file # max number of keywords to use from each html file # max number of listings per keyword # max length of each key word # root directory to start the html seach in and the root directory # to create the html project files in.....needs trailing slash # helpfile name # index file name # title to display in htmlhelp file # default file to display in htmlhelp file # types of files to inclue in htmlhelp file project # ##################################################################### my $max_key_words = 8192; my $max_key_words_per_file = 64; my $key_max_listings = 5; my $max_key_word_length =15; my $root_dir = "C:/perl_functions/"; my $helpfile = "perl_functions"; my $index_file = "index.hhk"; my $help_title = "Perl builtin functions"; my $default_file = "html\\start.html"; my @file_types = qw(htm html shtml ); # main program code #======================================================================================= my @filelist = (); $root_dir =~ s/\/$//; my $dir = $root_dir; &get_html_files($dir); $root_dir .= "/"; &get_html_keywords($root_dir,$index_file,@filelist); &make_help_file($root_dir,$helpfile,$index_file,$help_title,$default_file,@filelist); print "done"; exit; ##################################################################################### # library subroutines follow ##################################################################################### ##################################################################################### # FUNCTION get_html_files # RECEIVES root directory without trailing slash # RETURNS 1|0 for success # PURPOSE Creates the list of html files for the html help project. #======================================================================================= sub get_html_files { my($dir)=@_; my $pd = "/"; opendir(DIR,$dir) || return 0; my(@files) = grep(!/^\./,readdir(DIR)); # ignore dot files closedir(DIR); my($f); foreach $f (@files) { my $fullName="$dir$pd$f"; if (-d "$fullName$pd") { &get_html_files("$fullName") } else { if (-T $fullName) { foreach(@file_types){ if($fullName =~ /$_$/){ push(@filelist,$fullName); last; } } } } } } ##################################################################################### # FUNCTION get_title # RECEIVES root directory and full pathname of html file # RETURNS the title in the html file # PURPOSE searchs the html file for the title to display in the html contents file. #======================================================================================= sub get_title{ my ($root,$file) = @_; my $buf = ""; my ($title,$in); $title = $file; $title =~ s/$root//; open(FILE, "<$file") || die("$file Can't open because ($!)."); while ($in = ) { $buf .= $in; $in =~ s/[\n\r\t]//g; if($in =~ ///i; $in =~ s/\<\/title\>//i; $in =~ s/[\"\'\<\>\\\/]//g; $title = $in; } } close FILE; print "$file $title\n"; return $title; } ##################################################################################### # FUNCTION get_html_keywords # RECEIVES root directory with trailing slash # index file name.....index.hhk for index search # list of html files to include in the helpfile # RETURNS 1|0 for success # PURPOSE Creates the html keyword index file for the html help project. #======================================================================================= sub get_html_keywords { my ($root,$indexfile,@htmlfiles) = @_; my %ret; my $file; my %results = (); my @keywords = (); my $keyword_count = 0; my %key_seen = (); my $kw = 0; my ($term,$w,$key,$i,$key_max,$all_done); $root =~ s/\\/\//g; print "adding keywords from:\n"; foreach $file(@htmlfiles ){ print "$file\n"; open(FILE, "<$file") || die("$file Can't open because ($!)."); my $string = join(' ',<FILE>); close FILE; my %seen=(); my @c = (); $string =~ s/<html(.*)<body([^>]*)>/ /gi; $string =~ s/<[^>]*\s+ALT\s*=\s*"(([^>"])*)"[^>]*>/ $1 /ig; $string =~ s/<([^>]|\n)*>/ /g; $string =~ s/<!--([^>]*)-->/ /g; foreach $term(@ignore) { $string =~ s/\b$term\b//gi; } my @words = split(/\s+/,$string); my $kw_per = 0; foreach(@words){ s/[\r\n\t\#\!\[\]\,\$\@\;\(\)\{\}\.\"\'\:\%\-\?\”\“\|]//g; s/"//g; s/<//g; s/>//g; s/&nbs;//g; next if $_ =~ /&/; next if $_ eq ""; next if length($_) > $max_key_word_length; push(@c,lc($_)) unless $seen{lc($_)}; $seen{lc($_)} = 1; push(@keywords,lc($_)) unless $key_seen{lc($_)}; $key_seen{lc($_)} = 1; last if $kw_per > $max_key_words_per_file; $kw_per++; } #if($#c>0){ $file =~ s/$root//; push(@{$results{$file}},@c); #} } my $path = $root .$indexfile; unlink($path); print "index file= $path\n"; my $keyword_path = $root ."keywords.txt"; unless(open(KW, ">$keyword_path")) { $! = "Could not write keyword file"; return 0; } print KW "my \@ignore = qw("; unless(open(HHC, ">$path")) { $! = "Could not write index file"; return 0; } print HHC <<'EOT'; <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN"> <HTML> <HEAD> <!-- Sitemap 1.0 --> </HEAD> <BODY> <UL> EOT $kw = 0; print "adding keywords\n"; foreach $w(sort @keywords){ next if $w =~ /[0-9]/; next if $w eq ""; next if length($w) < 3; my $dont = 0; my $site_count = 0; my $site_text = ""; if($w =~ /(^[a-z])/){ last if $keyword_count++ > $max_key_words; print "$keyword_count keyword::$w\n"; print KW "$w "; if($kw > 10){ $kw = 0; print KW "\n"; } $kw++; $site_text = qq!\n<LI> <OBJECT type="text/sitemap">!; foreach $key (sort keys %results){ next if $key eq ""; $dont++; for($i=0; $i< @{$results{$key}}; $i++){ last if $i > $key_max_listings; if($results{$key}->[$i] eq $w){ $site_count++; $site_text .= qq!\n<param name="Name" value="$w">\n<param name="Local" value="$key">!; } } } } $site_text .= qq!\n</OBJECT>! if $dont > 0; print HHC $site_text if $site_count > 0; } print HHC "</UL>\n"; print HHC "</BODY></HTML>\n"; close(HHC); print KW ");\n"; close(KW); } ##################################################################################### # FUNCTION make_help_file # RECEIVES root directory with trailing slash # helpfile name (without drive or path), # index file name.....index.hhk for index search # html file title # default html file to display in htmlhelp file # list of html files to include in the helpfile # RETURNS None # SETS None # EXPECTS None # PURPOSE Creates htnlhelp project and contents file from a list of html files. #======================================================================================= sub make_help_file { my ($root,$helpfile,$index_file,$help_title,$default_file,@htmlfiles) = @_; print "---CreateHHP---\n"; unless(CreateHHP($root,$helpfile, $index_file, $help_title,$default_file, @htmlfiles)) { return 0; } print "---CreateHHC---\n"; unless(CreateHHC($root,$helpfile,@htmlfiles)) { return 0; } 1; } ##################################################################################### # FUNCTION CreateHHP # RECEIVES root directory with trailing slash # helpfile name (without drive or path), # index file name.....index.hhk for index search # html file title # default html file to display in htmlhelp file # list of html files to include in the helpfile # RETURNS 1|0 for success # PURPOSE Creates the project file for the html help project. #======================================================================================= sub CreateHHP { my ($root,$helpfile, $index_file, $help_title,$default_file,@htmlfiles) = @_; my $file; $root =~ s/\\/\//g; my $project_chm = $helpfile .".chm"; my $project_hhc = $helpfile .".hhc"; my $project_hhp = $helpfile .".hhp"; my $projfile = $root .$project_hhp; print "Creating $projfile\n"; unless(open(HHP, ">$projfile")) { $! = "Could not write project file"; return 0; } print HHP <<EOT; [OPTIONS] Compatibility=1.1 Compiled file=$project_chm Contents file=$project_hhc Display compile progress=Yes Index file=$index_file Title=$help_title Full-text search=Yes Language=0x409 English (United States) Default Window=main Default topic=$default_file [WINDOWS] main=,"$project_hhc","$index_file","$default_file","$default_file",,,,,0x2520,,0x307e,,,,,,,,0 [FILES] EOT foreach $file (@htmlfiles) { $file =~ s/$root//; print HHP "$file\n"; print " added $file\n"; } close(HHP); return 1; } ##################################################################################### # FUNCTION CreateHHC # RECEIVES root directory with trailing slash # helpfile name (without drive or path), # list of html files to include in the helpfile # RETURNS 1|0 for success # PURPOSE Creates the contents file for the html help project. #======================================================================================= sub CreateHHC { my ($root,$helpfile,@files) = @_; my $file; my $title; my $key; my %results = (); $root =~ s/\\/\//g; my $project_hhc = $helpfile .".hhc"; my $tocfile = $root .$project_hhc; print "Creating $tocfile\n"; unless(open(HHC, ">$tocfile")) { $! = "Could not write contents file"; return 0; } print HHC <<'EOT'; <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN"> <HTML> <HEAD> <!-- Sitemap 1.0 --> </HEAD> <BODY> <OBJECT type="text/site properties"> <param name="ImageType" value="Folder"> </OBJECT> <UL> EOT foreach $file (@files) { # don't want default.htm in the toc file next if $file =~ /default\.html?$/i; $title = &get_title($root,$file); $file =~ s/$root//; print " adding $helpfile::$file\n"; push(@{$results{$title}},$file); } foreach $key (sort keys %results){ print HHC qq! <LI> <OBJECT type="text/sitemap"> <param name="Name" value="$key"> <param name="Local" value="$results{$key}->[0]"> </OBJECT>!; } print HHC "</UL>\n"; print HHC "</BODY></HTML>\n"; close(HHC); 1; } #=======================================================================================