summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Hintz <t@thintz.com>2014-10-18 09:54:59 -0700
committerThomas Hintz <t@thintz.com>2014-10-18 09:54:59 -0700
commitb1ed3de161b8bd43f578f2e35faecce6213c4c4b (patch)
tree221dc5b13a03b452db2c940c97d81c78292e306b
parentb6fae3ef783de83579bf557acfdf9d6f5e236a60 (diff)
downloadwebsockets-b1ed3de161b8bd43f578f2e35faecce6213c4c4b.tar.gz
Adding fast ASCII only UTF8 validation.
-rw-r--r--websockets.scm26
1 files changed, 25 insertions, 1 deletions
diff --git a/websockets.scm b/websockets.scm
index f6724b2..75b2886 100644
--- a/websockets.scm
+++ b/websockets.scm
@@ -185,6 +185,8 @@
(define-external wslen int len)
+ ; TODO handle -1
+
(define-external wsv scheme-pointer payload)
((foreign-lambda* void ()
"
@@ -327,8 +329,30 @@
'optype frame-optype)))))))))))
(include "utf8-grammar.scm")
+
(define (valid-utf8? s)
- (parse utf8-string (->parser-input s) memoize: #t))
+ (or (let ((len (string-length s)))
+ ; Try to validate as an ascii string first. Its essentially
+ ; free, doesn't generate garbage and is many, many times
+ ; faster than the general purpose validator.
+ (define-external ws_utlen int len)
+ (define-external ws_uts scheme-pointer s)
+ (= 1
+ ((foreign-lambda* int ()
+"
+ if (ws_utlen > UINT_MAX) { return -1; }
+
+ for (int i = ws_utlen; i != 0; --i)
+ {
+ if (*((unsigned char*)ws_uts++) > 127)
+ {
+ C_return(0);
+ }
+ }
+
+ C_return(1);
+"))))
+ (parse utf8-string (->parser-input s))))
(define (close-code->integer s)
(if (string-null? s)