# dawg.pl # Copyright (C) 1996 by John J. Chew, III # All Rights Reserved # # dawg.pl # functions for working with dawg files as generated by the package # posted to alt.sources, not yet checked for byte sex dependency # assumed file format: # # four-byte records, the first the index of the last record in the # file, the second (root) and all the rest: # 0-7 character 8 end-of-word 9 end-of-list 10-31 pointer package dawg; $version = '1.1.1'; ## public functions: # $yesno = &dawg'check(*HANDLE, $word); # $yesno = &dawg'check_suffix(*HANDLE, $index, $suffix); # $status = &dawg'close(*HANDLE); # $status = &dawg'open(*HANDLE, $filename); # ($char, $eow, $eol, $index) = &dawg'get_record(*HANDLE, $index); # $index = &dawg'get_root(*HANDLE); # $yesno = &dawg'check(*HANDLE, $word); sub check { local(*HANDLE, $word) = @_; &check_suffix(*HANDLE, &get_root(*HANDLE), $word); # local($char, @chars, $eow, $eol, $here, $next, $rv); # # @chars = split('', $word); # $here = &get_root(*HANDLE); # $rv = 0; # ($char, $eow, $eol, $next) = &get_record(*HANDLE, $here); # while (1) { # if ($chars[0] eq $char) { # shift @chars; # if ($#chars < $[) { $rv = 1 if $eow; last; } # last unless ($here = $next); # ($char, $eow, $eol, $next) = &get_record(*HANDLE, $here); # next; # } # else { # last if $eol; # ($char, $eow, $eol, $next) = &get_record(*HANDLE, ++$here); # } # } # $rv; } # $yesno = &dawg'check_suffix(*HANDLE, $index, $suffix); sub check_suffix { local(*HANDLE, $index, $suffix) = @_; die "dawg'check_suffix: null suffix" if $suffix eq ''; local($char, @chars, $eow, $eol, $next); @chars = split('', $suffix); while (1) { ($char, $eow, $eol, $next) = &get_record(*HANDLE, $index); if ($chars[0] eq $char) { shift @chars; $#chars < $[ && return $eow ? 1 : 0; ($index = $next) || return 0; } else { $eol && return 0; $index++; } } } # $status = &dawg'close(*HANDLE); sub close { local(*HANDLE) = @_; close HANDLE; } # $status = &dawg'open(*HANDLE, $filename); sub open { local(*HANDLE, $filename) = @_; local($result) = open(HANDLE, "<$filename"); if ($result) { local($actual_size) = (stat HANDLE)[7]; local($declared_size, $record); $record = ''; sysread(HANDLE, $record, 4); $declared_size = unpack('N', $record); if ($actual_size/4 != $declared_size + 1) { close HANDLE; # should actually try other byte sexes here, will implement as needed printf STDERR "dawg: $filename: declared file size (%08x) != actual file size (%08x)\n", $declared_size, $actual_size/4 - 1; undef; } else { $result; } } } # ($char, $eow, $eol, $index) = &get_record(*HANDLE, $index); sub get_record { local(*HANDLE, $index) = @_; local($char, $eol, $eow, $record); seek(HANDLE, $index*4, 0) || die "seek() failed"; # printf "get_record(HANDLE,0x%06x): ", $index; $record = ''; sysread(HANDLE, $record, 4); $index = unpack('N', $record); $char = $index >> 24; $eow = $index & 0x800000; $eol = $index & 0x400000; # printf "('%s'=0x%02x, %d, %d, 0x%06x)\n", pack('c', $char), $char, $eow, $eol , $index & 0x3fffff; (pack('c', $char), $eow, $eol, $index & 0x3fffff); } # $index = &get_root(*HANDLE); sub get_root { 1; } 1;