001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.wicket.util.string; 018 019import java.io.UnsupportedEncodingException; 020import java.nio.charset.Charset; 021import java.util.ArrayList; 022import java.util.List; 023import java.util.Locale; 024import java.util.regex.Matcher; 025import java.util.regex.Pattern; 026 027import org.apache.wicket.util.lang.Args; 028 029/** 030 * A variety of static String utility methods. 031 * <p> 032 * The escapeMarkup() and toMultilineMarkup() methods are useful for turning normal Java Strings 033 * into HTML strings. 034 * <p> 035 * The lastPathComponent(), firstPathComponent(), afterFirstPathComponent() and 036 * beforeLastPathComponent() methods can chop up a String into path components using a separator 037 * character. If the separator cannot be found the original String is returned. 038 * <p> 039 * Similarly, the beforeLast(), beforeFirst(), afterFirst() and afterLast() methods return sections 040 * before and after a separator character. But if the separator cannot be found, an empty string is 041 * returned. 042 * <p> 043 * Some other miscellaneous methods will strip a given ending off a String if it can be found 044 * (stripEnding()), replace all occurrences of one String with another (replaceAll), do type 045 * conversions (toBoolean(), toChar(), toString()), check a String for emptiness (isEmpty()), 046 * convert a Throwable to a String (toString(Throwable)) or capitalize a String (capitalize()). 047 * 048 * @author Jonathan Locke 049 */ 050public final class Strings 051{ 052 /** 053 * The line separator for the current platform. 054 * 055 * @deprecated Use {@link System#lineSeparator()} 056 */ 057 @Deprecated 058 public static final String LINE_SEPARATOR = System.lineSeparator(); 059 060 /** A table of hex digits */ 061 private static final char[] HEX_DIGIT = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 062 'A', 'B', 'C', 'D', 'E', 'F' }; 063 064 private static final Pattern HTML_NUMBER_REGEX = Pattern.compile("&#\\d+;"); 065 066 private static final String[] NO_STRINGS = new String[0]; 067 068 /** 069 * The name of the parameter used to keep the session id. 070 * The Servlet specification mandates <em>jsessionid</em> but the web containers 071 * provide ways to set a custom one, e.g. <em>sid</em>. 072 * Since Wicket doesn't have access to the web container internals the name should be set explicitly. 073 */ 074 public static final String SESSION_ID_PARAM_NAME = System.getProperty("wicket.jsessionid.name", "jsessionid"); 075 076 /** 077 * Constructs something like <em>;jsessionid=</em>. This is what {@linkplain Strings#stripJSessionId(String)} 078 * actually uses. 079 */ 080 private static final String SESSION_ID_PARAM = ';' + SESSION_ID_PARAM_NAME + '='; 081 082 /** 083 * Private constructor prevents construction. 084 */ 085 private Strings() 086 { 087 } 088 089 /** 090 * Returns everything after the first occurrence of the given character in s. 091 * 092 * @param s 093 * The string 094 * @param c 095 * The character 096 * @return Everything after the first occurrence of the given character in s. If the character 097 * cannot be found, an empty string is returned. 098 */ 099 public static String afterFirst(final String s, final char c) 100 { 101 if (s == null) 102 { 103 return null; 104 } 105 final int index = s.indexOf(c); 106 107 if (index == -1) 108 { 109 return ""; 110 } 111 112 return s.substring(index + 1); 113 } 114 115 /** 116 * Gets everything after the first path component of a path using a given separator. If the 117 * separator cannot be found, an empty String is returned. 118 * <p> 119 * For example, afterFirstPathComponent("foo:bar:baz", ':') would return "bar:baz" and 120 * afterFirstPathComponent("foo", ':') would return "". 121 * 122 * @param path 123 * The path to parse 124 * @param separator 125 * The path separator character 126 * @return Everything after the first component in the path 127 */ 128 public static String afterFirstPathComponent(final String path, final char separator) 129 { 130 return afterFirst(path, separator); 131 } 132 133 /** 134 * Returns everything after the last occurrence of the given character in s. 135 * 136 * @param s 137 * The string 138 * @param c 139 * The character 140 * @return Everything after the last occurrence of the given character in s. If the character 141 * cannot be found, an empty string is returned. 142 */ 143 public static String afterLast(final String s, final char c) 144 { 145 if (s == null) 146 { 147 return null; 148 } 149 final int index = s.lastIndexOf(c); 150 151 if (index == -1) 152 { 153 return ""; 154 } 155 156 return s.substring(index + 1); 157 } 158 159 /** 160 * Returns everything before the first occurrence of the given character in s. 161 * 162 * @param s 163 * The string 164 * @param c 165 * The character 166 * @return Everything before the first occurrence of the given character in s. If the character 167 * cannot be found, an empty string is returned. 168 */ 169 public static String beforeFirst(final String s, final char c) 170 { 171 if (s == null) 172 { 173 return null; 174 } 175 final int index = s.indexOf(c); 176 177 if (index == -1) 178 { 179 return ""; 180 } 181 182 return s.substring(0, index); 183 } 184 185 /** 186 * Returns everything before the last occurrence of the given character in s. 187 * 188 * @param s 189 * The string 190 * @param c 191 * The character 192 * @return Everything before the last occurrence of the given character in s. If the character 193 * cannot be found, an empty string is returned. 194 */ 195 public static String beforeLast(final String s, final char c) 196 { 197 if (s == null) 198 { 199 return null; 200 } 201 final int index = s.lastIndexOf(c); 202 203 if (index == -1) 204 { 205 return ""; 206 } 207 208 return s.substring(0, index); 209 } 210 211 /** 212 * Gets everything before the last path component of a path using a given separator. If the 213 * separator cannot be found, the path itself is returned. 214 * <p> 215 * For example, beforeLastPathComponent("foo.bar.baz", '.') would return "foo.bar" and 216 * beforeLastPathComponent("foo", '.') would return "". 217 * 218 * @param path 219 * The path to parse 220 * @param separator 221 * The path separator character 222 * @return Everything before the last component in the path 223 */ 224 public static String beforeLastPathComponent(final String path, final char separator) 225 { 226 return beforeLast(path, separator); 227 } 228 229 /** 230 * Capitalizes a string. 231 * 232 * @param s 233 * The string 234 * @return The capitalized string 235 */ 236 public static String capitalize(final String s) 237 { 238 if (s == null) 239 { 240 return null; 241 } 242 final char[] chars = s.toCharArray(); 243 244 if (chars.length > 0) 245 { 246 chars[0] = Character.toUpperCase(chars[0]); 247 } 248 249 return new String(chars); 250 } 251 252 /** 253 * Converts a Java String to an HTML markup string, but does not convert normal spaces to 254 * non-breaking space entities (<nbsp>). 255 * 256 * @param s 257 * The characters to escape 258 * @see Strings#escapeMarkup(CharSequence, boolean) 259 * @return The escaped string 260 */ 261 public static CharSequence escapeMarkup(final CharSequence s) 262 { 263 return escapeMarkup(s, false); 264 } 265 266 /** 267 * Converts a Java String to an HTML markup String by replacing illegal characters with HTML 268 * entities where appropriate. Spaces are converted to non-breaking spaces (<nbsp>) if 269 * escapeSpaces is true, tabs are converted to four non-breaking spaces, less than signs are 270 * converted to &lt; entities and greater than signs to &gt; entities. 271 * 272 * @param s 273 * The characters to escape 274 * @param escapeSpaces 275 * True to replace ' ' with nonbreaking space 276 * @return The escaped string 277 */ 278 public static CharSequence escapeMarkup(final CharSequence s, final boolean escapeSpaces) 279 { 280 return escapeMarkup(s, escapeSpaces, false); 281 } 282 283 /** 284 * Converts a Java String to an HTML markup String by replacing illegal characters with HTML 285 * entities where appropriate. Spaces are converted to non-breaking spaces (<nbsp>) if 286 * escapeSpaces is true, tabs are converted to four non-breaking spaces, less than signs are 287 * converted to &lt; entities and greater than signs to &gt; entities. 288 * 289 * @param s 290 * The characters to escape 291 * @param escapeSpaces 292 * True to replace ' ' with nonbreaking space 293 * @param convertToHtmlUnicodeEscapes 294 * True to convert non-7 bit characters to unicode HTML (&#...) 295 * @return The escaped string 296 */ 297 public static CharSequence escapeMarkup(final CharSequence s, final boolean escapeSpaces, 298 final boolean convertToHtmlUnicodeEscapes) 299 { 300 if (s == null) 301 { 302 return null; 303 } 304 305 final int len = s.length(); 306 if (len == 0) 307 { 308 return s; 309 } 310 311 final AppendingStringBuffer buffer = new AppendingStringBuffer((int)(len * 1.1)); 312 313 for (int i = 0; i < len; i++) 314 { 315 final char c = s.charAt(i); 316 317 if (Character.getType(c) == Character.UNASSIGNED) 318 { 319 continue; 320 } 321 switch (c) 322 { 323 case '\t' : 324 if (escapeSpaces) 325 { 326 // Assumption is four space tabs (sorry, but that's 327 // just how it is!) 328 buffer.append(" "); 329 } 330 else 331 { 332 buffer.append(c); 333 } 334 break; 335 336 case ' ' : 337 if (escapeSpaces) 338 { 339 buffer.append(" "); 340 } 341 else 342 { 343 buffer.append(c); 344 } 345 break; 346 347 case '<' : 348 buffer.append("<"); 349 break; 350 351 case '>' : 352 buffer.append(">"); 353 break; 354 355 case '&' : 356 357 buffer.append("&"); 358 break; 359 360 case '"' : 361 buffer.append("""); 362 break; 363 364 case '\'' : 365 buffer.append("'"); 366 break; 367 368 default : 369 370 int ci = 0xffff & c; 371 372 if ( 373 // if this is non-printable and not whitespace (TAB, LF, CR) 374 ((ci < 32) && (ci != 9) && (ci != 10) && (ci != 13)) || 375 // or non-ASCII (XXX: why 160+ ?!) and need to UNICODE escape it 376 (convertToHtmlUnicodeEscapes && (ci > 159))) 377 { 378 buffer.append("&#"); 379 buffer.append(Integer.toString(ci)); 380 buffer.append(';'); 381 } 382 else 383 { 384 // ASCII or whitespace 385 buffer.append(c); 386 } 387 break; 388 } 389 } 390 391 return buffer; 392 } 393 394 /** 395 * Unescapes the escaped entities in the <code>markup</code> passed. 396 * 397 * @param markup 398 * The source <code>String</code> to unescape. 399 * @return the unescaped markup or <code>null</null> if the input is <code>null</code> 400 */ 401 public static CharSequence unescapeMarkup(final String markup) 402 { 403 String unescapedMarkup = StringEscapeUtils.unescapeHtml(markup); 404 return unescapedMarkup; 405 } 406 407 /** 408 * Gets the first path component of a path using a given separator. If the separator cannot be 409 * found, the path itself is returned. 410 * <p> 411 * For example, firstPathComponent("foo.bar", '.') would return "foo" and 412 * firstPathComponent("foo", '.') would return "foo". 413 * 414 * @param path 415 * The path to parse 416 * @param separator 417 * The path separator character 418 * @return The first component in the path or path itself if no separator characters exist. 419 */ 420 public static String firstPathComponent(final String path, final char separator) 421 { 422 if (path == null) 423 { 424 return null; 425 } 426 final int index = path.indexOf(separator); 427 428 if (index == -1) 429 { 430 return path; 431 } 432 433 return path.substring(0, index); 434 } 435 436 /** 437 * Converts encoded \uxxxx to unicode chars and changes special saved chars to their 438 * original forms. 439 * 440 * @param escapedUnicodeString 441 * escaped unicode string, like '\u4F60\u597D'. 442 * 443 * @return The actual unicode. Can be used for instance with message bundles 444 */ 445 public static String fromEscapedUnicode(final String escapedUnicodeString) 446 { 447 int off = 0; 448 char[] in = escapedUnicodeString.toCharArray(); 449 int len = in.length; 450 char[] out = new char[len]; 451 char aChar; 452 int outLen = 0; 453 int end = off + len; 454 455 while (off < end) 456 { 457 aChar = in[off++]; 458 if (aChar == '\\') 459 { 460 aChar = in[off++]; 461 if (aChar == 'u') 462 { 463 // Read the xxxx 464 int value = 0; 465 for (int i = 0; i < 4; i++) 466 { 467 aChar = in[off++]; 468 switch (aChar) 469 { 470 case '0' : 471 case '1' : 472 case '2' : 473 case '3' : 474 case '4' : 475 case '5' : 476 case '6' : 477 case '7' : 478 case '8' : 479 case '9' : 480 value = (value << 4) + aChar - '0'; 481 break; 482 case 'a' : 483 case 'b' : 484 case 'c' : 485 case 'd' : 486 case 'e' : 487 case 'f' : 488 value = (value << 4) + 10 + aChar - 'a'; 489 break; 490 case 'A' : 491 case 'B' : 492 case 'C' : 493 case 'D' : 494 case 'E' : 495 case 'F' : 496 value = (value << 4) + 10 + aChar - 'A'; 497 break; 498 default : 499 throw new IllegalArgumentException("Malformed \\uxxxx encoding."); 500 } 501 } 502 out[outLen++] = (char)value; 503 } 504 else 505 { 506 if (aChar == 't') 507 { 508 aChar = '\t'; 509 } 510 else if (aChar == 'r') 511 { 512 aChar = '\r'; 513 } 514 else if (aChar == 'n') 515 { 516 aChar = '\n'; 517 } 518 else if (aChar == 'f') 519 { 520 aChar = '\f'; 521 } 522 out[outLen++] = aChar; 523 } 524 } 525 else 526 { 527 out[outLen++] = aChar; 528 } 529 } 530 return new String(out, 0, outLen); 531 } 532 533 /** 534 * Checks whether the <code>string</code> is considered empty. Empty means that the string may 535 * contain whitespace, but no visible characters. 536 * 537 * "\n\t " is considered empty, while " a" is not. 538 * 539 * @param string 540 * The string 541 * @return True if the string is null or "" 542 */ 543 public static boolean isEmpty(final CharSequence string) 544 { 545 return string == null || string.length() == 0 || 546 (string.charAt(0) <= ' ' && string.toString().trim().isEmpty()); 547 } 548 549 /** 550 * Checks whether the <code>string</code> is considered empty. Empty means that the string may 551 * contain whitespace, but no visible characters. 552 * 553 * "\n\t " is considered empty, while " a" is not. 554 * 555 * Note: This method overloads {@link #isEmpty(CharSequence)} for performance reasons. 556 * 557 * @param string 558 * The string 559 * @return True if the string is null or "" 560 */ 561 public static boolean isEmpty(final String string) 562 { 563 return string == null || string.isEmpty() || 564 (string.charAt(0) <= ' ' && string.trim().isEmpty()); 565 } 566 567 /** 568 * Checks whether two strings are equals taken care of 'null' values and treating 'null' same as 569 * trim(string).equals("") 570 * 571 * @param string1 572 * @param string2 573 * @return true, if both strings are equal 574 */ 575 public static boolean isEqual(final String string1, final String string2) 576 { 577 if ((string1 == null) && (string2 == null)) 578 { 579 return true; 580 } 581 582 if (isEmpty(string1) && isEmpty(string2)) 583 { 584 return true; 585 } 586 if ((string1 == null) || (string2 == null)) 587 { 588 return false; 589 } 590 591 return string1.equals(string2); 592 } 593 594 /** 595 * Converts the text in <code>s</code> to a corresponding boolean. On, yes, y, true and 1 are 596 * converted to <code>true</code>. Off, no, n, false and 0 (zero) are converted to 597 * <code>false</code>. An empty string is converted to <code>false</code>. Conversion is 598 * case-insensitive, and does <em>not</em> take internationalization into account. 599 * 600 * 'Ja', 'Oui', 'Igen', 'Nein', 'Nee', 'Non', 'Nem' are all illegal values. 601 * 602 * @param s 603 * the value to convert into a boolean 604 * @return Boolean the converted value of <code>s</code> 605 * @throws StringValueConversionException 606 * when the value of <code>s</code> is not recognized. 607 */ 608 public static boolean isTrue(final String s) throws StringValueConversionException 609 { 610 if (s != null) 611 { 612 if (s.equalsIgnoreCase("true")) 613 { 614 return true; 615 } 616 617 if (s.equalsIgnoreCase("false")) 618 { 619 return false; 620 } 621 622 if (s.equalsIgnoreCase("on") || s.equalsIgnoreCase("yes") || s.equalsIgnoreCase("y") || 623 s.equalsIgnoreCase("1")) 624 { 625 return true; 626 } 627 628 if (s.equalsIgnoreCase("off") || s.equalsIgnoreCase("no") || s.equalsIgnoreCase("n") || 629 s.equalsIgnoreCase("0")) 630 { 631 return false; 632 } 633 634 if (isEmpty(s)) 635 { 636 return false; 637 } 638 639 throw new StringValueConversionException("Boolean value \"" + s + "\" not recognized"); 640 } 641 642 return false; 643 } 644 645 /** 646 * Joins string fragments using the specified separator 647 * 648 * @param separator 649 * @param fragments 650 * @return combined fragments 651 */ 652 public static String join(final String separator, final List<String> fragments) 653 { 654 if (fragments == null) 655 { 656 return ""; 657 } 658 return join(separator, fragments.toArray(new String[0])); 659 } 660 661 /** 662 * Joins string fragments using the specified separator 663 * 664 * @param separator 665 * @param fragments 666 * @return combined fragments 667 */ 668 public static String join(final String separator, final String... fragments) 669 { 670 if ((fragments == null) || (fragments.length < 1)) 671 { 672 // no elements 673 return ""; 674 } 675 else if (fragments.length < 2) 676 { 677 // single element 678 return fragments[0]; 679 } 680 else 681 { 682 // two or more elements 683 AppendingStringBuffer buff = new AppendingStringBuffer(128); 684 if (fragments[0] != null) 685 { 686 buff.append(fragments[0]); 687 } 688 boolean separatorNotEmpty = !Strings.isEmpty(separator); 689 for (int i = 1; i < fragments.length; i++) 690 { 691 String fragment = fragments[i]; 692 String previousFragment = fragments[i - 1]; 693 if (previousFragment != null || fragment != null) 694 { 695 boolean lhsClosed = previousFragment.endsWith(separator); 696 boolean rhsClosed = fragment.startsWith(separator); 697 if (separatorNotEmpty && lhsClosed && rhsClosed) 698 { 699 buff.append(fragment.substring(1)); 700 } 701 else if (!lhsClosed && !rhsClosed) 702 { 703 if (!Strings.isEmpty(fragment)) 704 { 705 buff.append(separator); 706 } 707 buff.append(fragment); 708 } 709 else 710 { 711 buff.append(fragment); 712 } 713 } 714 } 715 return buff.toString(); 716 } 717 } 718 719 /** 720 * Gets the last path component of a path using a given separator. If the separator cannot be 721 * found, the path itself is returned. 722 * <p> 723 * For example, lastPathComponent("foo.bar", '.') would return "bar" and 724 * lastPathComponent("foo", '.') would return "foo". 725 * 726 * @param path 727 * The path to parse 728 * @param separator 729 * The path separator character 730 * @return The last component in the path or path itself if no separator characters exist. 731 */ 732 public static String lastPathComponent(final String path, final char separator) 733 { 734 if (path == null) 735 { 736 return null; 737 } 738 739 final int index = path.lastIndexOf(separator); 740 741 if (index == -1) 742 { 743 return path; 744 } 745 746 return path.substring(index + 1); 747 } 748 749 /** 750 * Replace all occurrences of one string replaceWith another string. 751 * 752 * @param s 753 * The string to process 754 * @param searchFor 755 * The value to search for 756 * @param replaceWith 757 * The value to searchFor replaceWith 758 * @return The resulting string with searchFor replaced with replaceWith 759 */ 760 public static CharSequence replaceAll(final CharSequence s, final CharSequence searchFor, 761 CharSequence replaceWith) 762 { 763 if (s == null) 764 { 765 return null; 766 } 767 768 // If searchFor is null or the empty string, then there is nothing to 769 // replace, so returning s is the only option here. 770 if ((searchFor == null) || searchFor.length() == 0) 771 { 772 return s; 773 } 774 775 // If replaceWith is null, then the searchFor should be replaced with 776 // nothing, which can be seen as the empty string. 777 if (replaceWith == null) 778 { 779 replaceWith = ""; 780 } 781 782 String searchString = searchFor.toString(); 783 // Look for first occurrence of searchFor 784 int matchIndex = search(s, searchString, 0); 785 if (matchIndex == -1) 786 { 787 // No replace operation needs to happen 788 return s; 789 } 790 else 791 { 792 return s.toString().replace(searchString, replaceWith); 793 } 794 } 795 796 /** 797 * Replace HTML numbers like &#20540; by the appropriate character. 798 * 799 * @param str 800 * The text to be evaluated 801 * @return The text with "numbers" replaced 802 */ 803 public static String replaceHtmlEscapeNumber(String str) 804 { 805 if (str == null) 806 { 807 return null; 808 } 809 Matcher matcher = HTML_NUMBER_REGEX.matcher(str); 810 while (matcher.find()) 811 { 812 int pos = matcher.start(); 813 int end = matcher.end(); 814 int number = Integer.parseInt(str.substring(pos + 2, end - 1)); 815 char ch = (char)number; 816 str = str.substring(0, pos) + ch + str.substring(end); 817 matcher = HTML_NUMBER_REGEX.matcher(str); 818 } 819 820 return str; 821 } 822 823 /** 824 * Simpler, faster version of String.split() for splitting on a simple character. 825 * 826 * @param s 827 * The string to split 828 * @param c 829 * The character to split on 830 * @return The array of strings 831 */ 832 public static String[] split(final String s, final char c) 833 { 834 if (s == null || s.isEmpty()) 835 { 836 return NO_STRINGS; 837 } 838 839 int pos = s.indexOf(c); 840 if (pos == -1) 841 { 842 return new String[] { s }; 843 } 844 845 int next = s.indexOf(c, pos + 1); 846 if (next == -1) 847 { 848 return new String[] { s.substring(0, pos), s.substring(pos + 1) }; 849 } 850 851 final List<String> strings = new ArrayList<>(); 852 strings.add(s.substring(0, pos)); 853 strings.add(s.substring(pos + 1, next)); 854 while (true) 855 { 856 pos = next + 1; 857 next = s.indexOf(c, pos); 858 if (next == -1) 859 { 860 strings.add(s.substring(pos)); 861 break; 862 } 863 else 864 { 865 strings.add(s.substring(pos, next)); 866 } 867 } 868 final String[] result = new String[strings.size()]; 869 strings.toArray(result); 870 return result; 871 } 872 873 /** 874 * Strips the ending from the string <code>s</code>. 875 * 876 * @param s 877 * The string to strip 878 * @param ending 879 * The ending to strip off 880 * @return The stripped string or the original string if the ending did not exist 881 */ 882 public static String stripEnding(final String s, final String ending) 883 { 884 if (s == null) 885 { 886 return null; 887 } 888 889 // Stripping a null or empty string from the end returns the 890 // original string. 891 if (ending == null || ending.isEmpty()) 892 { 893 return s; 894 } 895 final int endingLength = ending.length(); 896 final int sLength = s.length(); 897 898 // When the length of the ending string is larger 899 // than the original string, the original string is returned. 900 if (endingLength > sLength) 901 { 902 return s; 903 } 904 final int index = s.lastIndexOf(ending); 905 final int endpos = sLength - endingLength; 906 907 if (index == endpos) 908 { 909 return s.substring(0, endpos); 910 } 911 912 return s; 913 } 914 915 /** 916 * Strip any jsessionid and possibly other redundant info that might be in our way. 917 * 918 * @param url 919 * The url to strip 920 * @return The stripped url 921 */ 922 public static String stripJSessionId(final String url) 923 { 924 if (Strings.isEmpty(url)) 925 { 926 return url; 927 } 928 929 // http://.../abc;jsessionid=...?param=... 930 int ixSemiColon = url.indexOf(SESSION_ID_PARAM); 931 if (ixSemiColon == -1) 932 { 933 return url; 934 } 935 936 int ixQuestionMark = url.indexOf('?'); 937 if (ixQuestionMark == -1) 938 { 939 // no query paramaters; cut off at ";" 940 // http://.../abc;jsession=... 941 return url.substring(0, ixSemiColon); 942 } 943 944 if (ixQuestionMark <= ixSemiColon) 945 { 946 // ? is before ; - no jsessionid in the url 947 return url; 948 } 949 950 return url.substring(0, ixSemiColon) + url.substring(ixQuestionMark); 951 } 952 953 /** 954 * Converts the string s to a Boolean. See <code>isTrue</code> for valid values of s. 955 * 956 * @param s 957 * The string to convert. 958 * @return Boolean <code>TRUE</code> when <code>isTrue(s)</code>. 959 * @throws StringValueConversionException 960 * when s is not a valid value 961 * @see #isTrue(String) 962 */ 963 public static Boolean toBoolean(final String s) throws StringValueConversionException 964 { 965 return isTrue(s); 966 } 967 968 /** 969 * Converts the 1 character string s to a character. 970 * 971 * @param s 972 * The 1 character string to convert to a char. 973 * @return Character value to convert 974 * @throws StringValueConversionException 975 * when the string is longer or shorter than 1 character, or <code>null</code>. 976 */ 977 public static char toChar(final String s) throws StringValueConversionException 978 { 979 if (s != null) 980 { 981 if (s.length() == 1) 982 { 983 return s.charAt(0); 984 } 985 else 986 { 987 throw new StringValueConversionException("Expected single character, not \"" + s + 988 "\""); 989 } 990 } 991 992 throw new StringValueConversionException("Character value was null"); 993 } 994 995 /** 996 * Converts unicodes to encoded \uxxxx. 997 * 998 * @param unicodeString 999 * The unicode string 1000 * @return The escaped unicode string, like '\u4F60\u597D'. 1001 */ 1002 public static String toEscapedUnicode(final String unicodeString) 1003 { 1004 if (unicodeString == null || unicodeString.isEmpty()) 1005 { 1006 return unicodeString; 1007 } 1008 int len = unicodeString.length(); 1009 int bufLen = len * 2; 1010 StringBuilder outBuffer = new StringBuilder(bufLen); 1011 for (int x = 0; x < len; x++) 1012 { 1013 char aChar = unicodeString.charAt(x); 1014 if (Character.getType(aChar) == Character.UNASSIGNED) 1015 { 1016 continue; 1017 } 1018 // Handle common case first, selecting largest block that 1019 // avoids the specials below 1020 if ((aChar > 61) && (aChar < 127)) 1021 { 1022 if (aChar == '\\') 1023 { 1024 outBuffer.append('\\'); 1025 outBuffer.append('\\'); 1026 continue; 1027 } 1028 outBuffer.append(aChar); 1029 continue; 1030 } 1031 switch (aChar) 1032 { 1033 case ' ' : 1034 if (x == 0) 1035 { 1036 outBuffer.append('\\'); 1037 } 1038 outBuffer.append(' '); 1039 break; 1040 case '\t' : 1041 outBuffer.append('\\'); 1042 outBuffer.append('t'); 1043 break; 1044 case '\n' : 1045 outBuffer.append('\\'); 1046 outBuffer.append('n'); 1047 break; 1048 case '\r' : 1049 outBuffer.append('\\'); 1050 outBuffer.append('r'); 1051 break; 1052 case '\f' : 1053 outBuffer.append('\\'); 1054 outBuffer.append('f'); 1055 break; 1056 case '=' : // Fall through 1057 case ':' : // Fall through 1058 case '#' : // Fall through 1059 case '!' : 1060 outBuffer.append('\\'); 1061 outBuffer.append(aChar); 1062 break; 1063 default : 1064 if ((aChar < 0x0020) || (aChar > 0x007e)) 1065 { 1066 outBuffer.append('\\'); 1067 outBuffer.append('u'); 1068 outBuffer.append(toHex((aChar >> 12) & 0xF)); 1069 outBuffer.append(toHex((aChar >> 8) & 0xF)); 1070 outBuffer.append(toHex((aChar >> 4) & 0xF)); 1071 outBuffer.append(toHex(aChar & 0xF)); 1072 } 1073 else 1074 { 1075 outBuffer.append(aChar); 1076 } 1077 } 1078 } 1079 return outBuffer.toString(); 1080 } 1081 1082 /** 1083 * Converts a String to multiline HTML markup by replacing newlines with line break entities 1084 * (<br/>) and multiple occurrences of newline with paragraph break entities (<p>). 1085 * 1086 * @param s 1087 * String to transform 1088 * @return String with all single occurrences of newline replaced with <br/> and all 1089 * multiple occurrences of newline replaced with <p>. 1090 */ 1091 public static CharSequence toMultilineMarkup(final CharSequence s) 1092 { 1093 if (s == null) 1094 { 1095 return null; 1096 } 1097 1098 final int len = s.length(); 1099 1100 // allocate a buffer that is 10% larger than the original string to account for markup 1101 final AppendingStringBuffer buffer = new AppendingStringBuffer((int) (len * 1.1) + 16); 1102 int newlineCount = 0; 1103 1104 buffer.append("<p>"); 1105 for (int i = 0; i < len; i++) 1106 { 1107 final char c = s.charAt(i); 1108 1109 switch (c) 1110 { 1111 case '\n' : 1112 newlineCount++; 1113 break; 1114 1115 case '\r' : 1116 break; 1117 1118 default : 1119 if (newlineCount == 1) 1120 { 1121 buffer.append("<br/>"); 1122 } 1123 else if (newlineCount > 1) 1124 { 1125 buffer.append("</p><p>"); 1126 } 1127 1128 buffer.append(c); 1129 newlineCount = 0; 1130 break; 1131 } 1132 } 1133 if (newlineCount == 1) 1134 { 1135 buffer.append("<br/>"); 1136 } 1137 else if (newlineCount > 1) 1138 { 1139 buffer.append("</p><p>"); 1140 } 1141 buffer.append("</p>"); 1142 return buffer; 1143 } 1144 1145 /** 1146 * Converts the given object to a string. Does special conversion for {@link Throwable 1147 * throwables} and String arrays of length 1 (in which case it just returns to string in that 1148 * array, as this is a common thing to have in the Servlet API). 1149 * 1150 * @param object 1151 * The object 1152 * @return The string 1153 */ 1154 public static String toString(final Object object) 1155 { 1156 if (object == null) 1157 { 1158 return null; 1159 } 1160 1161 if (object instanceof Throwable) 1162 { 1163 return toString((Throwable)object); 1164 } 1165 1166 if (object instanceof String) 1167 { 1168 return (String)object; 1169 } 1170 1171 if ((object instanceof String[]) && (((String[])object).length == 1)) 1172 { 1173 return ((String[])object)[0]; 1174 } 1175 1176 return object.toString(); 1177 } 1178 1179 1180 /** 1181 * Converts a Throwable to a string. 1182 * 1183 * @param throwable 1184 * The throwable 1185 * @return The string 1186 */ 1187 public static String toString(final Throwable throwable) 1188 { 1189 if (throwable != null) 1190 { 1191 List<Throwable> al = new ArrayList<>(); 1192 Throwable cause = throwable; 1193 al.add(cause); 1194 while ((cause.getCause() != null) && (cause != cause.getCause())) 1195 { 1196 cause = cause.getCause(); 1197 al.add(cause); 1198 } 1199 1200 AppendingStringBuffer sb = new AppendingStringBuffer(256); 1201 // first print the last cause 1202 int length = al.size() - 1; 1203 cause = al.get(length); 1204 if (throwable instanceof RuntimeException) 1205 { 1206 sb.append("Message: "); 1207 sb.append(throwable.getMessage()); 1208 sb.append("\n\n"); 1209 } 1210 sb.append("Root cause:\n\n"); 1211 outputThrowable(cause, sb, false); 1212 1213 if (length > 0) 1214 { 1215 sb.append("\n\nComplete stack:\n\n"); 1216 for (int i = 0; i < length; i++) 1217 { 1218 outputThrowable(al.get(i), sb, true); 1219 sb.append('\n'); 1220 } 1221 } 1222 return sb.toString(); 1223 } 1224 else 1225 { 1226 return "<Null Throwable>"; 1227 } 1228 } 1229 1230 private static void append(final AppendingStringBuffer buffer, final CharSequence s, 1231 final int from, final int to) 1232 { 1233 if (s instanceof AppendingStringBuffer) 1234 { 1235 AppendingStringBuffer asb = (AppendingStringBuffer)s; 1236 buffer.append(asb.getValue(), from, to - from); 1237 } 1238 else 1239 { 1240 buffer.append(s.subSequence(from, to)); 1241 } 1242 } 1243 1244 /** 1245 * Outputs the throwable and its stacktrace to the stringbuffer. If stopAtWicketSerlvet is true 1246 * then the output will stop when the org.apache.wicket servlet is reached. sun.reflect. 1247 * packages are filtered out. 1248 * 1249 * @param cause 1250 * @param sb 1251 * @param stopAtWicketServlet 1252 */ 1253 private static void outputThrowable(final Throwable cause, final AppendingStringBuffer sb, 1254 final boolean stopAtWicketServlet) 1255 { 1256 sb.append(cause); 1257 sb.append("\n"); 1258 StackTraceElement[] trace = cause.getStackTrace(); 1259 for (int i = 0; i < trace.length; i++) 1260 { 1261 String traceString = trace[i].toString(); 1262 if (!(traceString.startsWith("sun.reflect.") && (i > 1))) 1263 { 1264 sb.append(" at "); 1265 sb.append(traceString); 1266 sb.append("\n"); 1267 if (stopAtWicketServlet && 1268 (traceString.startsWith("org.apache.wicket.protocol.http.WicketServlet") || traceString.startsWith("org.apache.wicket.protocol.http.WicketFilter"))) 1269 { 1270 return; 1271 } 1272 } 1273 } 1274 } 1275 1276 private static int search(final CharSequence s, final String searchString, final int pos) 1277 { 1278 if (s instanceof String) 1279 { 1280 return ((String)s).indexOf(searchString, pos); 1281 } 1282 else if (s instanceof StringBuffer) 1283 { 1284 return ((StringBuffer)s).indexOf(searchString, pos); 1285 } 1286 else if (s instanceof StringBuilder) 1287 { 1288 return ((StringBuilder)s).indexOf(searchString, pos); 1289 } 1290 else if (s instanceof AppendingStringBuffer) 1291 { 1292 return ((AppendingStringBuffer)s).indexOf(searchString, pos); 1293 } 1294 else 1295 { 1296 return s.toString().indexOf(searchString, pos); 1297 } 1298 } 1299 1300 /** 1301 * Convert a nibble to a hex character 1302 * 1303 * @param nibble 1304 * the nibble to convert. 1305 * @return hex character 1306 */ 1307 private static char toHex(final int nibble) 1308 { 1309 return HEX_DIGIT[(nibble & 0xF)]; 1310 } 1311 1312 /** 1313 * Calculates the length of string in bytes, uses specified <code>charset</code> if provided. 1314 * 1315 * @param string 1316 * @param charset 1317 * (optional) character set to use when converting string to bytes 1318 * @return length of string in bytes 1319 */ 1320 public static int lengthInBytes(final String string, final Charset charset) 1321 { 1322 Args.notNull(string, "string"); 1323 if (charset != null) 1324 { 1325 try 1326 { 1327 return string.getBytes(charset.name()).length; 1328 } 1329 catch (UnsupportedEncodingException e) 1330 { 1331 throw new RuntimeException( 1332 "StringResourceStream created with unsupported charset: " + charset.name()); 1333 } 1334 } 1335 else 1336 { 1337 return string.getBytes().length; 1338 } 1339 } 1340 1341 /** 1342 * Extended {@link String#startsWith(String)} with support for case sensitivity 1343 * 1344 * @param str 1345 * @param prefix 1346 * @param caseSensitive 1347 * @return <code>true</code> if <code>str</code> starts with <code>prefix</code> 1348 */ 1349 public static boolean startsWith(final String str, final String prefix, 1350 final boolean caseSensitive) 1351 { 1352 if (caseSensitive) 1353 { 1354 return str.startsWith(prefix); 1355 } 1356 else 1357 { 1358 return str.toLowerCase(Locale.ROOT).startsWith(prefix.toLowerCase(Locale.ROOT)); 1359 } 1360 } 1361 1362 /** 1363 * returns the zero-based index of a character within a char sequence. this method mainly exists 1364 * as an faster alternative for <code>sequence.toString().indexOf(ch)</code>. 1365 * 1366 * @param sequence 1367 * character sequence 1368 * @param ch 1369 * character to search for 1370 * @return index of character within character sequence or <code>-1</code> if not found 1371 */ 1372 public static int indexOf(final CharSequence sequence, final char ch) 1373 { 1374 if (sequence != null) 1375 { 1376 for (int i = 0; i < sequence.length(); i++) 1377 { 1378 if (sequence.charAt(i) == ch) 1379 { 1380 return i; 1381 } 1382 } 1383 } 1384 1385 return -1; 1386 } 1387 1388 /** 1389 * <p> 1390 * Find the Levenshtein distance between two Strings. 1391 * </p> 1392 * 1393 * <p> 1394 * This is the number of changes needed to change one String into another, where each change is 1395 * a single character modification (deletion, insertion or substitution). 1396 * </p> 1397 * 1398 * <p> 1399 * The previous implementation of the Levenshtein distance algorithm was from <a 1400 * href="http://www.merriampark.com/ld.htm">http://www.merriampark.com/ld.htm</a> 1401 * </p> 1402 * 1403 * <p> 1404 * Chas Emerick has written an implementation in Java, which avoids an OutOfMemoryError which 1405 * can occur when my Java implementation is used with very large strings.<br> 1406 * This implementation of the Levenshtein distance algorithm is from <a 1407 * href="http://www.merriampark.com/ldjava.htm">http://www.merriampark.com/ldjava.htm</a> 1408 * </p> 1409 * 1410 * <pre> 1411 * Strings.getLevenshteinDistance(null, *) = IllegalArgumentException 1412 * Strings.getLevenshteinDistance(*, null) = IllegalArgumentException 1413 * Strings.getLevenshteinDistance("","") = 0 1414 * Strings.getLevenshteinDistance("","a") = 1 1415 * Strings.getLevenshteinDistance("aaapppp", "") = 7 1416 * Strings.getLevenshteinDistance("frog", "fog") = 1 1417 * Strings.getLevenshteinDistance("fly", "ant") = 3 1418 * Strings.getLevenshteinDistance("elephant", "hippo") = 7 1419 * Strings.getLevenshteinDistance("hippo", "elephant") = 7 1420 * Strings.getLevenshteinDistance("hippo", "zzzzzzzz") = 8 1421 * Strings.getLevenshteinDistance("hello", "hallo") = 1 1422 * </pre> 1423 * 1424 * Copied from Apache commons-lang StringUtils 3.0 1425 * 1426 * @param s 1427 * the first String, must not be null 1428 * @param t 1429 * the second String, must not be null 1430 * @return result distance 1431 * @throws IllegalArgumentException 1432 * if either String input {@code null} 1433 */ 1434 public static int getLevenshteinDistance(CharSequence s, CharSequence t) 1435 { 1436 if (s == null || t == null) 1437 { 1438 throw new IllegalArgumentException("Strings must not be null"); 1439 } 1440 1441 /* 1442 * The difference between this impl. and the previous is that, rather than creating and 1443 * retaining a matrix of size s.length()+1 by t.length()+1, we maintain two 1444 * single-dimensional arrays of length s.length()+1. The first, d, is the 'current working' 1445 * distance array that maintains the newest distance cost counts as we iterate through the 1446 * characters of String s. Each time we increment the index of String t we are comparing, d 1447 * is copied to p, the second int[]. Doing so allows us to retain the previous cost counts 1448 * as required by the algorithm (taking the minimum of the cost count to the left, up one, 1449 * and diagonally up and to the left of the current cost count being calculated). (Note that 1450 * the arrays aren't really copied anymore, just switched...this is clearly much better than 1451 * cloning an array or doing a System.arraycopy() each time through the outer loop.) 1452 * 1453 * Effectively, the difference between the two implementations is this one does not cause an 1454 * out of memory condition when calculating the LD over two very large strings. 1455 */ 1456 1457 int n = s.length(); // length of s 1458 int m = t.length(); // length of t 1459 1460 if (n == 0) 1461 { 1462 return m; 1463 } 1464 else if (m == 0) 1465 { 1466 return n; 1467 } 1468 1469 if (n > m) 1470 { 1471 // swap the input strings to consume less memory 1472 CharSequence tmp = s; 1473 s = t; 1474 t = tmp; 1475 n = m; 1476 m = t.length(); 1477 } 1478 1479 int p[] = new int[n + 1]; // 'previous' cost array, horizontally 1480 int d[] = new int[n + 1]; // cost array, horizontally 1481 int _d[]; // placeholder to assist in swapping p and d 1482 1483 // indexes into strings s and t 1484 int i; // iterates through s 1485 int j; // iterates through t 1486 1487 char t_j; // jth character of t 1488 1489 int cost; // cost 1490 1491 for (i = 0; i <= n; i++) 1492 { 1493 p[i] = i; 1494 } 1495 1496 for (j = 1; j <= m; j++) 1497 { 1498 t_j = t.charAt(j - 1); 1499 d[0] = j; 1500 1501 for (i = 1; i <= n; i++) 1502 { 1503 cost = s.charAt(i - 1) == t_j ? 0 : 1; 1504 // minimum of cell to the left+1, to the top+1, diagonally left and up +cost 1505 d[i] = Math.min(Math.min(d[i - 1] + 1, p[i] + 1), p[i - 1] + cost); 1506 } 1507 1508 // copy current distance counts to 'previous row' distance counts 1509 _d = p; 1510 p = d; 1511 d = _d; 1512 } 1513 1514 // our last action in the above loop was to switch d and p, so p now 1515 // actually has the most recent cost counts 1516 return p[n]; 1517 } 1518 1519 /** 1520 * convert byte array to hex string 1521 * 1522 * @param bytes 1523 * bytes to convert to hexadecimal representation 1524 * 1525 * @return hex string 1526 */ 1527 public static String toHexString(byte[] bytes) 1528 { 1529 Args.notNull(bytes, "bytes"); 1530 1531 final StringBuilder hex = new StringBuilder(bytes.length << 1); 1532 1533 for (final byte b : bytes) 1534 { 1535 hex.append(toHex(b >> 4)); 1536 hex.append(toHex(b)); 1537 } 1538 return hex.toString(); 1539 } 1540 1541 1542 /** 1543 * Return this value as en enum value. 1544 * 1545 * @param value 1546 * the value to convert to an enum value 1547 * @param enumClass 1548 * the enum type 1549 * @return an enum value 1550 */ 1551 public static <T extends Enum<T>> T toEnum(final CharSequence value, final Class<T> enumClass) 1552 { 1553 Args.notNull(enumClass, "enumClass"); 1554 Args.notNull(value, "value"); 1555 1556 try 1557 { 1558 return Enum.valueOf(enumClass, value.toString()); 1559 } 1560 catch (Exception e) 1561 { 1562 throw new StringValueConversionException( 1563 String.format("Cannot convert '%s' to enum constant of type '%s'.", value, enumClass), e); 1564 } 1565 } 1566 1567 /** 1568 * Returns the original string if this one is not empty (i.e. {@link #isEmpty(CharSequence)} returns false), 1569 * otherwise the default one is returned. The default string might be itself an empty one. 1570 * 1571 * @param originalString 1572 * the original sting value 1573 * @param defaultValue 1574 * the default string to return if the original is empty 1575 * @return the original string value if not empty, the default one otherwise 1576 */ 1577 public static String defaultIfEmpty(String originalString, String defaultValue) 1578 { 1579 return isEmpty(originalString) ? defaultValue : originalString; 1580 } 1581}