{$IFDEF WINDOWS}
{$N-,V-,W-,G+,R-}
{$ELSE}
{$E-,N-,V-,R-}
{$ENDIF}

{
                        Wildcard Pattern Matching

    Author: J. Kercheval
    Created: Sat, 01/05/1991  22:21:49

    Translated to Pascal by Mikolaj Rydzewski, Tue, 28/02/1995 23:25:45
    Ported to TP 6.0 & PartMatch() added by Eyal Doron, 9/3/95
    
    Usage: Regular expressions may contain the following wildcards:

       `*' matches any sequence of characters (zero or more)
       `?' matches any character
       [SET] matches any character in the specified set,
       [!SET] or [^SET] matches any character not in the specified set.

    A set is composed of characters or ranges; a range looks like
    character hyphen character (as in 0-9 or A-Z).  [0-9a-zA-Z_] is the
    minimal set of characters allowed in the [..] pattern construct.
    Other characters are allowed (ie. 8 bit characters) if your system
    will support them.

    To suppress the special syntactic significance of any of `[]*?!^-\',
    and match the character exactly, precede it with a `\'.
    
    The basic interface is through the routines "Match" and "PartMatch",
    both of whom are interfaces to the routine "MatchE".
    "Match" accepts two generic data variables and their lengths, and the
            boolean "CaseSen". It returns TRUE if the first sequence is 
            equivalent to the second.
    "PartMatch" accepts a rexexp STRING variable and a generic text variable
            and its length, and the boolean "CaseSen". It returns TRUE if the
            second variable contains the first. It also allows the wildcards
            "^" at the beginning and "$" at the end of the regexp to specify
            the beginning/end of the text string. Note that it will return
            FALSE for strings which are too long (typically >253 chars).
    "CaseSen" determines if the searches are case sensitive or not, in all
            cases.

}
unit bibwild;

interface

const
  MATCH_PATTERN = 1;
  MATCH_LITERAL = 2;
  MATCH_RANGE   = 3;
  MATCH_ABORT   = 4;
  MATCH_END     = 5;
  MATCH_VALID   = 6;
  PATTERN_VALID = 1;
  PATTERN_ESC   = 2;
  PATTERN_RANGE = 3;
  PATTERN_CLOSE = 4;
  PATTERN_EMPTY = 5;

  RegexpChars = ['^','!','\','*','?','$','[','-',']'];
  
function Match(var pattern; plen: word; var text; tlen: word; 
               CaseSen: boolean): boolean;
function PartMatch(pattern: string; var text; tlen: word; CaseSen: boolean): boolean;
function is_valid_regexp(pattern: string; var error_type: integer): boolean;
function ValidRegexp(S: string; detail: boolean): boolean;
function is_regexp(pattern: string): boolean;
function matche(var ipattern, itext; plen,tlen: word; CaseSen: boolean): integer;

{ In order to use e.g. "Match" with strings, call it as  }
{ Match(pat[1],txt[1],length(pat),length(txt))           }

implementation

Uses
  rc_strng,
{$IFDEF WINDOWS}
  wbibdisp;
{$ELSE}
  bibdisp;
{$ENDIF}

const
  MATCH_START   = 0; { for internal use only! }
type
  TextString = array[1..65384] of char;

function GetNextChar(var A: TextString; var aind, alen: word; CaseSen: boolean): char;
begin
  if aind>=alen then 
    GetNextChar:=#0 
  else begin
    inc(aind); 
    if CaseSen then GetNextChar:=A[aind]
    else GetNextChar:=UpCase(A[aind]);
  end;
end;

function matche_after_star(var ipattern,itext; plen,tlen: word; 
                           CaseSen: boolean): integer; forward;

{ Return TRUE if PATTERN has any special wildcard characters. }
function is_regexp(pattern: string): boolean;
var
  i: word;
begin
  is_regexp:=true; i:=0;
  while i<=length(pattern) do
  begin
   if (pattern[i]='?') or (pattern[i]='*') or (pattern[i]='[')
      or (pattern[i]='\') then exit;
   inc(i);
  end;
  is_regexp:=false;
end;

{ Return TRUE if PATTERN has is a well formed regular expression according
  to the above syntax

 error_type is a return code based on the type of pattern error.  Zero is
 returned in error_type if the pattern is a valid one.  error_type return
 values are as follows:

   PATTERN_VALID - pattern is well formed
   PATTERN_ESC   - pattern has invalid escape ('\' at end of pattern)
   PATTERN_RANGE - [..] construct has a no end range in a '-' pair (ie [a-])
   PATTERN_CLOSE - [..] construct has no end bracket (ie [abc-g )
   PATTERN_EMPTY - [..] construct is empty (ie [])
}
function is_valid_regexp(pattern: string; var error_type: integer): boolean;
var
  i: word;
begin
  { init error_type }
  error_type:=PATTERN_VALID;
  is_valid_regexp:=false;
  i:=0;
  { loop through pattern to EOS }
  while i<=length(pattern) do
  { determine pattern type }
  case pattern[i] of
  { check literal escape, it cannot be at end of pattern }
  '\': begin
         inc(i);
         if i>length(pattern) then
         begin
           error_type := PATTERN_ESC;
           exit;
         end;
         inc(i);
       end;
   { the [..] construct must be well formed }
   '[': begin
          inc(i);
          { if end of pattern here then bad pattern }
          if i>length(pattern) then
          begin
            error_type := PATTERN_CLOSE;
            exit;
          end;
          { if the next character is ']' then bad pattern }
          if pattern[i]=']'then
          begin
            error_type := PATTERN_EMPTY;
            exit;
          end;
          { loop to end of [..] construct }
          while pattern[i]<>']'do
          begin
            { check for literal escape }
            if pattern[i]='\'then
            begin
              inc(i);
              { if end of pattern here then bad pattern }
              if i>length(pattern) then
              begin
                error_type := PATTERN_ESC;
                exit;
              end;
              inc(i);
            end
            else inc(i);
            { if end of pattern here then bad pattern }
            if i>length(pattern) then
            begin
              error_type:=PATTERN_CLOSE;
              exit;
            end;
            { if this a range }
            if pattern[i]='-'then
            begin
              inc(i);
              { we must have an end of range }
              if (i>length(pattern)) or (pattern[i]=']') then
              begin
                error_type:=PATTERN_RANGE;
                exit;
              end else
              begin
                { check for literal escape }
                if pattern[i]='\' then inc(i);
                { if end of pattern here then bad pattern }
                if i>length(pattern) then
                begin
                  error_type:=PATTERN_ESC;
                  exit;
                end;
                inc(i);
               end;
             end;
           end;
         end;
     { all other characters are valid pattern elements }
    '*': inc(i);
    '?': inc(i);
    else inc(i);
  end;
  is_valid_regexp:=true;
end;

function ValidRegexp(S: string; detail: boolean): boolean;
var
  icode: integer;
begin
  if not Is_Valid_Regexp(S,icode) then
  begin
    if Detail then
      case icode of
        PATTERN_ESC   : ErrorMessageRC(Str_RegexpErrorESC,'');
        PATTERN_RANGE : ErrorMessageRC(Str_RegexpErrorRange,'');
        PATTERN_CLOSE : ErrorMessageRC(Str_RegexpErrorClose,'');
        PATTERN_EMPTY : ErrorMessageRC(Str_RegexpErrorEmpty,'');
      end;
    ValidRegexp:=false;
  end else ValidRegexp:=true;
end;

{  Match the regular expression PATTERN against the string TEXT;

  returns MATCH_VALID if pattern matches, or an errorcode as follows
  otherwise:

            MATCH_PATTERN  - bad or misformed pattern
            MATCH_LITERAL  - match failure on literal mismatch
            MATCH_RANGE    - match failure on [..] construct
            MATCH_ABORT    - premature end of text string
                             (pattern longer than text string)
            MATCH_END      - premature end of pattern string
                             (text longer than pattern called for)
            MATCH_VALID    - valid match


  A match means the entire string TEXT is used up in matching.

  In the regexp string:
       `*' matches any sequence of characters (zero or more)
       `?' matches any character
       [SET] matches any character in the specified set,
       [!SET] or [^SET] matches any character not in the specified set.

  A set is composed of characters or ranges; a range looks like
  character hyphen character (as in 0-9 or A-Z).  [0-9a-zA-Z_] is the
  minimal set of characters allowed in the [..] pattern construct.
  Other characters are allowed (ie. 8 bit characters) if your system
  will support them.

  To suppress the special syntactic significance of any of `[]*?!^-\',
  and match the character exactly, precede it with a `\'.
}
function matche(var ipattern,itext; plen,tlen: word; CaseSen: boolean): integer;
label
  EndOfLoop;
var
	text: TextString ABSOLUTE itext;
	pattern: TextString ABSOLUTE ipattern;
	range_start, range_end, pch, tch: char;
	invert,{ is this [..] or [!..] }
	member_match,{ have I matched the [..] construct? }
	loop: boolean;{ should I terminate? }
	pind,tind,tmpind: word;

begin
  tind:=1; pind:=1;
  if pind<=plen then pch:=pattern[pind] else pch:=#0; 
  if tind<=tlen then tch:=text[tind]    else tch:=#0;
  if not CaseSen then
  begin
    Pch:=UpCase(Pch); Tch:=UpCase(Tch);
  end;
  while pch<>#0 do
  begin
   { if this is the end of the text then this is the end of the match }
   if tch=#0 then
   begin
     if (pch='*') and (pind>=plen) then matche:=MATCH_VALID
     else matche:=MATCH_ABORT;
     exit;
   end;
   { determine and react to pattern type }
   case pch of
     { single any character match }
     '?':;
     { multiple any character match }
     '*':begin
           matche:= matche_after_star (pattern[pind], text[tind],
                                       plen-pind+1, tlen-tind+1,CaseSen);
           exit;
         end;
     { [..] construct, single member/exclusion character match }
     '[':begin
          { move to beginning of range }
          Pch:=GetNextChar(pattern,pind,plen,CaseSen);
          { check if this is a member match or exclusion match }
          invert := false;
          if (pch='!') or (pch='^') then
          begin
            invert:=true;
            Pch:=GetNextChar(pattern,pind,plen,CaseSen);
          end;
          { if closing bracket here or at range start then we have a
            malformed pattern }
          if pch = ']' then
          begin
            matche:= MATCH_PATTERN;
            exit;
          end;
          member_match := FALSE;
          loop := TRUE;
          while loop do
          begin
            { if end of construct then loop is done }
            if pch = ']' then
            begin
              loop := FALSE;
              goto EndOfLoop;
              {continue;      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! }
            end;
            { matching a '!', '^', '-', '\' or a ']' }
            if pch = '\' then
            begin
              Pch:=GetNextChar(pattern,pind,plen,CaseSen);
              range_end:=pch;
              range_start:=range_end;
            end else
            begin
              range_end:=pch;
              range_start:=range_end;
            end;
            { if end of pattern then bad pattern (Missing ']') }
            if Pch=#0 then
            begin
              matche:=MATCH_PATTERN;
              exit;
            end;
            Pch:=GetNextChar(pattern,pind,plen,CaseSen);
            { check for range bar }
            if pch = '-' then
            begin
              Pch:=GetNextChar(pattern,pind,plen,CaseSen);
              { get the range end }
              range_end := pch;
              { if end of pattern or construct then bad pattern }
              if (range_end = #0) or ( range_end = ']') then
              begin
                matche:=MATCH_PATTERN;
                exit;
              end;
              { special character range end }
              if range_end = '\' then
              begin
                Pch:=GetNextChar(pattern,pind,plen,CaseSen);
                range_end := pch;
                { if end of text then we have a bad pattern }
                if range_end=#0 then
                begin
                  matche:=MATCH_PATTERN;
                  exit;
                end;
              end;
              { move just beyond this range }
              Pch:=GetNextChar(pattern,pind,plen,CaseSen);
            end;
            { if the text character is in range then match found. Make sure
              the range letters have the proper relationship to one another
              before comparison }
            if  range_start < range_end then
            begin
              if (tch >= range_start) and (tch <= range_end) then
              begin
                member_match := TRUE;
                loop := FALSE;
              end
            end else
            begin
              if (tch >= range_end) and ( tch <= range_start) then
              begin
                member_match := TRUE;
                loop := FALSE;
              end
            end
          end;
          EndOfLoop: ;
          { if there was a match in an exclusion set then no match }
          if (invert and member_match) or not (invert or member_match) then
          begin
            matche:= MATCH_RANGE;
            exit;
          end;
          { if this is not an exclusion then skip the rest of the [...]
            construct that already matched. }
          if member_match then
          begin
            while pch <> ']'do
            begin
              { bad pattern (Missing ']') }
              if pch=#0 then
              begin
                matche:= MATCH_PATTERN;
                exit;
              end;
              { skip exact match }
              if pch = '\' then
              begin
                Pch:=GetNextChar(pattern,pind,plen,CaseSen);
                { if end of text then we have a bad pattern }
                if pch=#0 then
                begin
                  matche:=MATCH_PATTERN;
                  exit;
                end;
              end;
              { move to next pattern char }
              Pch:=GetNextChar(pattern,pind,plen,CaseSen);
            end;
          end;
         end;
     { next character is quoted and must match exactly }
     '\':begin
           { move pattern pointer to quoted char and fall through }
           Pch:=GetNextChar(pattern,pind,plen,CaseSen);
           { if end of text then we have a bad pattern }
           if pch=#0 then
           begin
             matche:=MATCH_PATTERN;
             exit;
           end;
           if pch <> tch then
           begin
             matche:=MATCH_LITERAL;
             exit;
           end;
         end;
     { must match this character exactly }
     else if pch <> tch then
         begin
           matche:=MATCH_LITERAL;
           exit;
         end;
   end;
   Pch:=GetNextChar(pattern,pind,plen,CaseSen);
   Tch:=GetNextChar(text,tind,tlen,CaseSen);
 end;
 { if end of text not reached then the pattern fails }
 if tch<>#0 then matche:=MATCH_END
 else matche:=MATCH_VALID;
end;

{ recursively call matche() with final segment of PATTERN and of TEXT.}
function matche_after_star(var ipattern, itext; plen,tlen: word; 
                           CaseSen: boolean): integer;
var
  text: TextString ABSOLUTE itext;
  pattern: TextString ABSOLUTE ipattern;
  match: integer;
  nextp, tch, pch: char;
  pind,tind: word;
begin
 match:=MATCH_START;
 pind:=1; tind:=1; 
 if tind<=tlen then tch:=text[tind]    else tch:=#0; 
 if pind<=plen then pch:=pattern[tind] else pch:=#0;
 if not CaseSen then
 begin
   Pch:=UpCase(Pch); Tch:=UpCase(Tch);
 end;
 { pass over existing ? and * in pattern }
 while ( pch= '?') or (pch= '*') do
 begin
   { take one char for each ? and * }
   if pch='?' then
   begin
     { if end of text then no match }
     if tch=#0 then
     begin
       matche_after_star:=MATCH_ABORT;
       exit;
     end;
     { move to next char in pattern }
     Tch:=GetNextChar(text,tind,tlen,CaseSen);
   end;
   Pch:=GetNextChar(pattern,pind,plen,CaseSen);
 end;
 { if end of pattern we have matched regardless of text left }
 if pch=#0 then
 begin
   matche_after_star:=MATCH_VALID;
   exit;
 end;
 { get the next character to match which must be a literal or '[' }
 nextp := pch;
 if  nextp='\' then
 begin
   if pind<plen then NextP:=pattern[pind+1] else NextP:=#0;
   if not CaseSen then NextP:=UpCase(NextP);
   { if end of text then we have a bad pattern }
   if nextp=#0 then
   begin
     matche_after_star:=MATCH_PATTERN;
     exit;
   end;
 end;
 { Continue until we run out of text or definite result seen }
 repeat
  { a precondition for matching is that the next character in the pattern
    match the next character in the text or that the next pattern char is
    the beginning of a range.  Increment text pointer as we go here }
  if (NextP=tch) or (NextP='[') then
    match:=matche(pattern[pind],text[tind],plen-pind+1,tlen-tind+1,CaseSen);
  { if the end of text is reached then no match }
  if tch=#0 then match := MATCH_ABORT;
  Tch:=GetNextChar(text,tind,tlen,CaseSen);
 until not ( (match<>MATCH_VALID) and (match<>MATCH_ABORT) and
             (match<>MATCH_PATTERN) );
 { return result }
 matche_after_star:=match;
end;

{ Match() is a shell to matche() to return only BOOLEAN values.}
function Match(var pattern; plen: word; var text; tlen: word; 
               CaseSen: boolean): boolean;
begin
  if plen=0 then 
    match:=true
  else
    match := matche(pattern,text,plen,tlen,CaseSen)=MATCH_VALID;
end;

{ PartMatch() is a shell to matche() to return only BOOLEAN values. It
  searches for "pattern" as a substring of "text", rather than a complete
  match. A bit of a kludge, as it only accepts strings of length <254.
  Use "^" at the beginning or "$" at the end to fix the beginning/end
  of the pattern to the beginning/end of the text (a-la SED).
  Also, "pattern" here is a string rather than a generic text variable.
}
function PartMatch(pattern: string; var text; tlen: word; CaseSen: boolean): boolean;
var
  DollarEnd,AlreadyStar: boolean;
  i: integer;
begin
  if length(pattern)=0 then 
    PartMatch:=true
  else begin
    if pattern[1]='^' then delete(pattern,1,1)
    else begin
      if (length(pattern)<255) and (pattern[1]<>'*') then 
      begin
        if pattern[1]<>'*' then pattern:='*'+pattern;
      end else if pattern[1]<>'*' then   { Pattern too long }
      begin
        PartMatch:=false; Exit;
      end;
    end;
    DollarEnd:=false;
    if pattern[length(pattern)]='$' then
    begin
      i:=length(pattern);
      while (i>1) and (pattern[i-1]='\') do dec(i);
      if (length(pattern)-i) mod 2 = 0 then DollarEnd:=true;
    end;
    if DollarEnd then Delete(pattern,length(pattern),1)
    else begin
      AlreadyStar:=false;
      if (pattern[length(pattern)]='*') then
      begin
        i:=length(pattern);
        while (i>1) and (pattern[i-1]='\') do dec(i);
        if (length(pattern)-i) mod 2 = 0 then AlreadyStar:=true;
      end;
      if not AlreadyStar and (length(pattern)<255) then
        pattern:=pattern+'*'
      else if not AlreadyStar then   { Pattern too long }
      begin
        PartMatch:=false; Exit;
      end;
    end;
    PartMatch := matche(pattern[1],text,length(pattern),tlen,CaseSen)=MATCH_VALID;
  end;
end;

end.
