001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.wicket.util.parse.metapattern; 018 019import java.util.Arrays; 020import java.util.List; 021import java.util.regex.Matcher; 022import java.util.regex.Pattern; 023 024import org.apache.wicket.util.io.IClusterable; 025 026 027/** 028 * Useful class for constructing readable and reusable regular expressions. 029 * <p> 030 * MetaPatterns can be constructed from a simple regular expression String, from other MetaPatterns 031 * (copy constructor), from a list of MetaPatterns or from an array of MetaPatterns. In this way, it 032 * is easy to build up larger patterns while transparently binding the capturing groups of each 033 * MetaPattern for easy object oriented access to capturing group matches. 034 * <p> 035 * A given MetaPattern can be converted to a Matcher or Pattern. Groups within the MetaPattern can 036 * be used to automatically reference capturing group values when a match is made with a Matcher 037 * object. 038 * <p> 039 * A variety of static constants are provided for use in constructing compound MetaPatterns. Also, a 040 * number of simple parsers have been constructed using MetaPatterns in the parsers subpackage. 041 * 042 * @author Jonathan Locke 043 */ 044public class MetaPattern implements IClusterable 045{ 046 private static final long serialVersionUID = 1L; 047 048 /** 049 * Compiled regular expression pattern, or null if patterns variable is valid instead 050 */ 051 private Pattern pattern; 052 053 /** List of patterns, or null if pattern variable is valid instead */ 054 private List<MetaPattern> patterns; 055 056 /** The compiled MetaPattern */ 057 private Pattern compiledPattern; 058 059 // Regexps that are used multiple times in defining meta patterns 060 private static final String _DOUBLE_QUOTED_STRING = "\"[^\"]*?\""; 061 private static final String _SINGLE_QUOTED_STRING = "'[^']*?\'"; 062 private static final String _STRING = "(?:[\\w\\-\\.]+|" + _DOUBLE_QUOTED_STRING + "|" + 063 _SINGLE_QUOTED_STRING + ")"; 064 private static final String _OPTIONAL_STRING = _STRING + "?"; 065 private static final String _VARIABLE_NAME = "[A-Za-z_][A-Za-z0-9_-]*"; 066 067 // '@' char is not allowed by https://www.w3.org/TR/REC-xml/#NT-NameStartChar 068 // but we allow it to be friendlier with VueJS 069 private static final String _XML_NAME = "[A-Za-z_:@][A-Za-z0-9_.-]*"; 070 071 // Delimiters and punctuation 072 /** Constant for whitespace. */ 073 public static final MetaPattern WHITESPACE = new MetaPattern("\\s+"); 074 075 /** Constant for optional whitespace. */ 076 public static final MetaPattern OPTIONAL_WHITESPACE = new MetaPattern("\\s*"); 077 078 /** Constant for non-word. */ 079 public static final MetaPattern NON_WORD = new MetaPattern("\\W+"); 080 081 /** Constant for comma. */ 082 public static final MetaPattern COMMA = new MetaPattern(","); 083 084 /** Constant for colon. */ 085 public static final MetaPattern COLON = new MetaPattern(":"); 086 087 /** Constant for semicolon. */ 088 public static final MetaPattern SEMICOLON = new MetaPattern(";"); 089 090 /** Constant for slash. */ 091 public static final MetaPattern SLASH = new MetaPattern("/"); 092 093 /** Constant for backslash. */ 094 public static final MetaPattern BACKSLASH = new MetaPattern("\\\\"); 095 096 /** Constant for dot. */ 097 public static final MetaPattern DOT = new MetaPattern("\\."); 098 099 /** Constant for plus. */ 100 public static final MetaPattern PLUS = new MetaPattern("\\+"); 101 102 /** Constant for minus. */ 103 public static final MetaPattern MINUS = new MetaPattern("-"); 104 105 /** Constant for dash. */ 106 public static final MetaPattern DASH = new MetaPattern("-"); 107 108 /** Constant for underscore. */ 109 public static final MetaPattern UNDERSCORE = new MetaPattern("_"); 110 111 /** Constant for ampersand. */ 112 public static final MetaPattern AMPERSAND = new MetaPattern("&"); 113 114 /** Constant for percent. */ 115 public static final MetaPattern PERCENT = new MetaPattern("%"); 116 117 /** Constant for dollar. */ 118 public static final MetaPattern DOLLAR_SIGN = new MetaPattern("$"); 119 120 /** Constant for pound. */ 121 public static final MetaPattern POUND_SIGN = new MetaPattern("#"); 122 123 /** Constant for at. */ 124 public static final MetaPattern AT_SIGN = new MetaPattern("@"); 125 126 /** Constant for excl. */ 127 public static final MetaPattern EXCLAMATION_POINT = new MetaPattern("!"); 128 129 /** Constant for tilde. */ 130 public static final MetaPattern TILDE = new MetaPattern("~"); 131 132 /** Constant for equals. */ 133 public static final MetaPattern EQUALS = new MetaPattern("="); 134 135 /** Constant for star. */ 136 public static final MetaPattern STAR = new MetaPattern("\\*"); 137 138 /** Constant for pipe. */ 139 public static final MetaPattern PIPE = new MetaPattern("\\|"); 140 141 /** Constant for left paren. */ 142 public static final MetaPattern LEFT_PAREN = new MetaPattern("\\("); 143 144 /** Constant for right paren. */ 145 public static final MetaPattern RIGHT_PAREN = new MetaPattern("\\)"); 146 147 /** Constant for left curly braces. */ 148 public static final MetaPattern LEFT_CURLY = new MetaPattern("\\{"); 149 150 /** Constant for right curly braces. */ 151 public static final MetaPattern RIGHT_CURLY = new MetaPattern("\\}"); 152 153 /** Constant for left square bracket. */ 154 public static final MetaPattern LEFT_SQUARE = new MetaPattern("\\["); 155 156 /** Constant for right square bracket. */ 157 public static final MetaPattern RIGHT_SQUARE = new MetaPattern("\\]"); 158 159 /** Constant for digit. */ 160 public static final MetaPattern DIGIT = new MetaPattern("\\d"); 161 162 /** Constant for digits. */ 163 public static final MetaPattern DIGITS = new MetaPattern("\\d+"); 164 165 /** Constant for an integer (of any size). */ 166 public static final MetaPattern INTEGER = new MetaPattern("-?\\d+"); 167 168 /** Constant for a floating point number. */ 169 public static final MetaPattern FLOATING_POINT_NUMBER = new MetaPattern( 170 "-?\\d+\\.?\\d*|-?\\.\\d+"); 171 172 /** Constant for a positive integer. */ 173 public static final MetaPattern POSITIVE_INTEGER = new MetaPattern("\\d+"); 174 175 /** Constant for hex digit. */ 176 public static final MetaPattern HEXADECIMAL_DIGIT = new MetaPattern("[0-9a-fA-F]"); 177 178 /** Constant for hex digits. */ 179 public static final MetaPattern HEXADECIMAL_DIGITS = new MetaPattern("[0-9a-fA-F]+"); 180 181 /** Constant for anything (string). */ 182 public static final MetaPattern ANYTHING = new MetaPattern(".*"); 183 184 /** Constant for anything non-empty (string). */ 185 public static final MetaPattern ANYTHING_NON_EMPTY = new MetaPattern(".+"); 186 187 /** Constant for a word. */ 188 public static final MetaPattern WORD = new MetaPattern("\\w+"); 189 190 /** Constant for an optional word. */ 191 public static final MetaPattern OPTIONAL_WORD = new MetaPattern("\\w*"); 192 193 /** Constant for a variable name. */ 194 public static final MetaPattern VARIABLE_NAME = new MetaPattern(_VARIABLE_NAME); 195 196 /** Constant for an XML element name. */ 197 public static final MetaPattern XML_ELEMENT_NAME = new MetaPattern(_XML_NAME); 198 199 /** Constant for an XML attribute name. */ 200 public static final MetaPattern XML_ATTRIBUTE_NAME = new MetaPattern(_XML_NAME); 201 202 /** Constant for perl interpolation. */ 203 public static final MetaPattern PERL_INTERPOLATION = new MetaPattern("$\\{" + _VARIABLE_NAME + 204 "\\}"); 205 206 /** Constant for a double quoted string. */ 207 public static final MetaPattern DOUBLE_QUOTED_STRING = new MetaPattern(_DOUBLE_QUOTED_STRING); 208 209 /** Constant for a string. */ 210 public static final MetaPattern STRING = new MetaPattern(_STRING); 211 212 /** Constant for an optional string. */ 213 public static final MetaPattern OPTIONAL_STRING = new MetaPattern(_OPTIONAL_STRING); 214 215 /** 216 * Constructor for a simple pattern. 217 * 218 * @param pattern 219 * The regular expression pattern to compile 220 */ 221 public MetaPattern(final String pattern) 222 { 223 this.pattern = Pattern.compile(pattern); 224 } 225 226 /** 227 * Copy constructor. 228 * 229 * @param pattern 230 * The meta pattern to copy 231 */ 232 public MetaPattern(final MetaPattern pattern) 233 { 234 this.pattern = pattern.pattern; 235 patterns = pattern.patterns; 236 compiledPattern = pattern.compiledPattern; 237 } 238 239 /** 240 * Constructs from an array of MetaPatterns. 241 * 242 * @param patterns 243 * Array of MetaPatterns 244 */ 245 public MetaPattern(final MetaPattern... patterns) 246 { 247 this(Arrays.asList(patterns)); 248 } 249 250 /** 251 * Constructs from a list of MetaPatterns 252 * 253 * @param patterns 254 * List of MetaPatterns 255 */ 256 public MetaPattern(final List<MetaPattern> patterns) 257 { 258 this.patterns = patterns; 259 } 260 261 /** 262 * Creates a matcher against a given input character sequence. 263 * 264 * @param input 265 * The input to match against 266 * @return The matcher 267 */ 268 public final Matcher matcher(final CharSequence input) 269 { 270 return matcher(input, 0); 271 } 272 273 /** 274 * Creates a matcher with the given regexp compile flags. Once you call this method with a given 275 * regexp compile flag value, the pattern will be compiled. Calling it again with a different 276 * value for flags will not recompile the pattern. 277 * 278 * @param input 279 * The input to match 280 * @param flags 281 * One or more of the standard Java regular expression compile flags (see 282 * {@link Pattern#compile(String, int)}) 283 * @return The matcher 284 */ 285 public final Matcher matcher(final CharSequence input, final int flags) 286 { 287 compile(flags); 288 return compiledPattern.matcher(input); 289 } 290 291 /** 292 * Gets the regular expression Pattern for this MetaPattern by compiling it. 293 * 294 * @return Pattern compiled with default Java regular expression compile flags 295 */ 296 public final Pattern pattern() 297 { 298 return pattern(0); 299 } 300 301 /** 302 * Gets the regular expression Pattern for this MetaPattern by compiling it using the given 303 * flags. 304 * 305 * @param flags 306 * One or more of the standard Java regular expression compile flags (see 307 * {@link Pattern#compile(String, int)}) 308 * @return Equivalent Java regular expression Pattern compiled with the given flags 309 */ 310 public final Pattern pattern(final int flags) 311 { 312 compile(flags); 313 return compiledPattern; 314 } 315 316 /** 317 * Converts this MetaPattern to a String. 318 * 319 * @return A String representing this MetaPattern 320 * @see java.lang.Object#toString() 321 */ 322 @Override 323 public String toString() 324 { 325 if (pattern != null) 326 { 327 return pattern.pattern(); 328 } 329 else 330 { 331 final StringBuilder buffer = new StringBuilder(); 332 for (MetaPattern metaPattern : patterns) 333 { 334 buffer.append(metaPattern); 335 } 336 return buffer.toString(); 337 } 338 } 339 340 /** 341 * Compiles this MetaPattern with the given Java regular expression flags. 342 * 343 * @param flags 344 * One or more of the standard Java regular expression compile flags (see 345 * {@link Pattern#compile(String, int)}) 346 */ 347 private synchronized void compile(final int flags) 348 { 349 if (compiledPattern == null) 350 { 351 bind(1); 352 compiledPattern = Pattern.compile(toString(), flags); 353 } 354 } 355 356 /** 357 * Binds this MetaPattern to one or more capturing groups. Since MetaPatterns can nest, the 358 * binding process can recurse. 359 * 360 * @param group 361 * The initial capturing group number 362 * @return The final capturing group (for use in recursion) 363 */ 364 private int bind(int group) 365 { 366 if (this instanceof Group) 367 { 368 ((Group)this).bind(group++); 369 } 370 371 if (patterns != null) 372 { 373 for (MetaPattern metaPattern : patterns) 374 { 375 group = metaPattern.bind(group); 376 } 377 } 378 379 return group; 380 } 381}