In looking at Gambit's irregex library, I noticed that the bit-field routines, used in manipulating flag bit-fields, is generic and probably very slow:
(define (bit-shr n i)
(quotient n (expt 2 i)))
(define (bit-shl n i)
(* n (expt 2 i)))
(define (bit-not n) (- #xFFFF n))
(define (bit-ior a b)
(cond
((zero? a) b)
((zero? b) a)
(else
(+ (if (or (odd? a) (odd? b)) 1 0)
(* 2 (bit-ior (quotient a 2) (quotient b 2)))))))
(define (bit-and a b)
(cond
((zero? a) 0)
((zero? b) 0)
(else
(+ (if (and (odd? a) (odd? b)) 1 0)
(* 2 (bit-and (quotient a 2) (quotient b 2)))))))
(define (integer-log n)
(define (b8 n r)
(if (>= n (bit-shl 1 8)) (b4 (bit-shr n 8) (+ r 8)) (b4 n r)))
(define (b4 n r)
(if (>= n (bit-shl 1 4)) (b2 (bit-shr n 4) (+ r 4)) (b2 n r)))
(define (b2 n r)
(if (>= n (bit-shl 1 2)) (b1 (bit-shr n 2) (+ r 2)) (b1 n r)))
(define (b1 n r) (if (>= n (bit-shl 1 1)) (+ r 1) r))
(if (>= n (bit-shl 1 16)) (b8 (bit-shr n 16) 16) (b8 n 0)))
(define (flag-set? flags i)
(= i (bit-and flags i)))
(define (flag-join a b)
(if b (bit-ior a b) a))
(define (flag-clear a b)
(bit-and a (bit-not b)))
(define ~none 0)
(define ~searcher? 1)
(define ~consumer? 2)
I don't know how crucial these routines are to the performance of the irregex library, but if I had a reasonably sized benchmark I'd replace all but bit-not (which assumes size-16 bit fields) with native versions of these routines to see what might improve.