9 //####################################
10 // BASIC CHARACTER MATCHERS
11 //####################################
13 // Match standard control chars
14 const char* kwd_at(const char* src);
15 const char* kwd_dot(const char* src);
16 const char* kwd_comma(const char* src);
17 const char* kwd_colon(const char* src);
18 const char* kwd_star(const char* src);
19 const char* kwd_plus(const char* src);
20 const char* kwd_minus(const char* src);
21 const char* kwd_slash(const char* src);
23 //####################################
24 // BASIC CLASS MATCHERS
25 //####################################
27 // These are locale independant
28 bool is_space(const char& src);
29 bool is_alpha(const char& src);
30 bool is_punct(const char& src);
31 bool is_digit(const char& src);
32 bool is_alnum(const char& src);
33 bool is_xdigit(const char& src);
34 bool is_unicode(const char& src);
35 bool is_nonascii(const char& src);
36 bool is_character(const char& src);
37 bool is_uri_character(const char& src);
38 bool escapable_character(const char& src);
40 // Match a single ctype predicate.
41 const char* space(const char* src);
42 const char* alpha(const char* src);
43 const char* digit(const char* src);
44 const char* xdigit(const char* src);
45 const char* alnum(const char* src);
46 const char* punct(const char* src);
47 const char* hyphen(const char* src);
48 const char* unicode(const char* src);
49 const char* nonascii(const char* src);
50 const char* character(const char* src);
51 const char* uri_character(const char* src);
52 const char* escapable_character(const char* src);
54 // Match multiple ctype characters.
55 const char* spaces(const char* src);
56 const char* digits(const char* src);
57 const char* hyphens(const char* src);
59 // Whitespace handling.
60 const char* no_spaces(const char* src);
61 const char* optional_spaces(const char* src);
63 // Match any single character (/./).
64 const char* any_char(const char* src);
66 // Assert word boundary (/\b/)
67 // Is a zero-width positive lookaheads
68 const char* word_boundary(const char* src);
70 // Match a single linebreak (/(?:\n|\r\n?)/).
71 const char* re_linebreak(const char* src);
73 // Assert string boundaries (/\Z|\z|\A/)
74 // There are zero-width positive lookaheads
75 const char* end_of_line(const char* src);
77 // Assert end_of_file boundary (/\z/)
78 const char* end_of_file(const char* src);
79 // const char* start_of_string(const char* src);
81 // Type definition for prelexer functions
82 typedef const char* (*prelexer)(const char*);
84 //####################################
85 // BASIC "REGEX" CONSTRUCTORS
86 //####################################
88 // Match a single character literal.
89 // Regex equivalent: /(?:x)/
91 const char* exactly(const char* src) {
92 return *src == chr ? src + 1 : 0;
95 // Match the full string literal.
96 // Regex equivalent: /(?:literal)/
97 template <const char* str>
98 const char* exactly(const char* src) {
99 if (str == 0) return 0;
100 const char* pre = str;
101 if (src == 0) return 0;
102 // there is a small chance that the search string
103 // is longer than the rest of the string to look at
104 while (*pre && *src == *pre) {
107 // did the matcher finish?
108 return *pre == 0 ? src : 0;
112 // Match the full string literal.
113 // Regex equivalent: /(?:literal)/i
114 // only define lower case alpha chars
115 template <const char* str>
116 const char* insensitive(const char* src) {
117 if (str == 0) return 0;
118 const char* pre = str;
119 if (src == 0) return 0;
120 // there is a small chance that the search string
121 // is longer than the rest of the string to look at
122 while (*pre && (*src == *pre || *src+32 == *pre)) {
125 // did the matcher finish?
126 return *pre == 0 ? src : 0;
129 // Match for members of char class.
130 // Regex equivalent: /[axy]/
131 template <const char* char_class>
132 const char* class_char(const char* src) {
133 const char* cc = char_class;
134 while (*cc && *src != *cc) ++cc;
135 return *cc ? src + 1 : 0;
138 // Match for members of char class.
139 // Regex equivalent: /[axy]+/
140 template <const char* char_class>
141 const char* class_chars(const char* src) {
143 while (class_char<char_class>(p)) ++p;
144 return p == src ? 0 : p;
147 // Match for members of char class.
148 // Regex equivalent: /[^axy]/
149 template <const char* neg_char_class>
150 const char* neg_class_char(const char* src) {
151 if (*src == 0) return 0;
152 const char* cc = neg_char_class;
153 while (*cc && *src != *cc) ++cc;
154 return *cc ? 0 : src + 1;
157 // Match for members of char class.
158 // Regex equivalent: /[^axy]+/
159 template <const char* neg_char_class>
160 const char* neg_class_chars(const char* src) {
162 while (neg_class_char<neg_char_class>(p)) ++p;
163 return p == src ? 0 : p;
166 // Match all except the supplied one.
167 // Regex equivalent: /[^x]/
168 template <const char chr>
169 const char* any_char_but(const char* src) {
170 return (*src && *src != chr) ? src + 1 : 0;
173 // Succeeds if the matcher fails.
174 // Aka. zero-width negative lookahead.
175 // Regex equivalent: /(?!literal)/
176 template <prelexer mx>
177 const char* negate(const char* src) {
178 return mx(src) ? 0 : src;
181 // Succeeds if the matcher succeeds.
182 // Aka. zero-width positive lookahead.
183 // Regex equivalent: /(?=literal)/
184 // just hangs around until we need it
185 template <prelexer mx>
186 const char* lookahead(const char* src) {
187 return mx(src) ? src : 0;
190 // Tries supplied matchers in order.
191 // Succeeds if one of them succeeds.
192 // Regex equivalent: /(?:FOO|BAR)/
193 template <const prelexer mx>
194 const char* alternatives(const char* src) {
196 if ((rslt = mx(src))) return rslt;
199 template <const prelexer mx1, const prelexer mx2, const prelexer... mxs>
200 const char* alternatives(const char* src) {
202 if ((rslt = mx1(src))) return rslt;
203 return alternatives<mx2, mxs...>(src);
206 // Tries supplied matchers in order.
207 // Succeeds if all of them succeeds.
208 // Regex equivalent: /(?:FOO)(?:BAR)/
209 template <const prelexer mx1>
210 const char* sequence(const char* src) {
211 const char* rslt = src;
212 if (!(rslt = mx1(rslt))) return 0;
215 template <const prelexer mx1, const prelexer mx2, const prelexer... mxs>
216 const char* sequence(const char* src) {
217 const char* rslt = src;
218 if (!(rslt = mx1(rslt))) return 0;
219 return sequence<mx2, mxs...>(rslt);
223 // Match a pattern or not. Always succeeds.
224 // Regex equivalent: /(?:literal)?/
225 template <prelexer mx>
226 const char* optional(const char* src) {
227 const char* p = mx(src);
231 // Match zero or more of the patterns.
232 // Regex equivalent: /(?:literal)*/
233 template <prelexer mx>
234 const char* zero_plus(const char* src) {
235 const char* p = mx(src);
236 while (p) src = p, p = mx(src);
240 // Match one or more of the patterns.
241 // Regex equivalent: /(?:literal)+/
242 template <prelexer mx>
243 const char* one_plus(const char* src) {
244 const char* p = mx(src);
246 while (p) src = p, p = mx(src);
250 // Match mx non-greedy until delimiter.
251 // Other prelexers are greedy by default.
252 // Regex equivalent: /(?:$mx)*?(?=$delim)\b/
253 template <prelexer mx, prelexer delim>
254 const char* non_greedy(const char* src) {
255 while (!delim(src)) {
256 const char* p = mx(src);
257 if (p == src) return 0;
258 if (p == 0) return 0;
264 //####################################
265 // ADVANCED "REGEX" CONSTRUCTORS
266 //####################################
268 // Match with word boundary rule.
269 // Regex equivalent: /(?:$mx)\b/i
270 template <const char* str>
271 const char* keyword(const char* src) {
278 // Match with word boundary rule.
279 // Regex equivalent: /(?:$mx)\b/
280 template <const char* str>
281 const char* word(const char* src) {
289 const char* loosely(const char* src) {
295 template <const char* str>
296 const char* loosely(const char* src) {