001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.wicket.util.string;
018
019import java.io.UnsupportedEncodingException;
020import java.nio.charset.Charset;
021import java.util.ArrayList;
022import java.util.List;
023import java.util.Locale;
024import java.util.regex.Matcher;
025import java.util.regex.Pattern;
026
027import org.apache.wicket.util.lang.Args;
028
029/**
030 * A variety of static String utility methods.
031 * <p>
032 * The escapeMarkup() and toMultilineMarkup() methods are useful for turning normal Java Strings
033 * into HTML strings.
034 * <p>
035 * The lastPathComponent(), firstPathComponent(), afterFirstPathComponent() and
036 * beforeLastPathComponent() methods can chop up a String into path components using a separator
037 * character. If the separator cannot be found the original String is returned.
038 * <p>
039 * Similarly, the beforeLast(), beforeFirst(), afterFirst() and afterLast() methods return sections
040 * before and after a separator character. But if the separator cannot be found, an empty string is
041 * returned.
042 * <p>
043 * Some other miscellaneous methods will strip a given ending off a String if it can be found
044 * (stripEnding()), replace all occurrences of one String with another (replaceAll), do type
045 * conversions (toBoolean(), toChar(), toString()), check a String for emptiness (isEmpty()),
046 * convert a Throwable to a String (toString(Throwable)) or capitalize a String (capitalize()).
047 * 
048 * @author Jonathan Locke
049 */
050public final class Strings
051{
052        /**
053         * The line separator for the current platform.
054         *
055         * @deprecated Use {@link System#lineSeparator()}
056         */
057        @Deprecated
058        public static final String LINE_SEPARATOR = System.lineSeparator();
059
060        /** A table of hex digits */
061        private static final char[] HEX_DIGIT = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
062                        'A', 'B', 'C', 'D', 'E', 'F' };
063
064        private static final Pattern HTML_NUMBER_REGEX = Pattern.compile("&#\\d+;");
065        
066        private static final String[] NO_STRINGS = new String[0];
067
068        /**
069         * The name of the parameter used to keep the session id.
070         * The Servlet specification mandates <em>jsessionid</em> but the web containers
071         * provide ways to set a custom one, e.g. <em>sid</em>.
072         * Since Wicket doesn't have access to the web container internals the name should be set explicitly.
073         */
074        public static final String SESSION_ID_PARAM_NAME = System.getProperty("wicket.jsessionid.name", "jsessionid");
075
076        /**
077         * Constructs something like <em>;jsessionid=</em>. This is what {@linkplain Strings#stripJSessionId(String)}
078         * actually uses.
079         */
080        private static final String SESSION_ID_PARAM = ';' + SESSION_ID_PARAM_NAME + '=';
081
082        /**
083         * Private constructor prevents construction.
084         */
085        private Strings()
086        {
087        }
088
089        /**
090         * Returns everything after the first occurrence of the given character in s.
091         * 
092         * @param s
093         *            The string
094         * @param c
095         *            The character
096         * @return Everything after the first occurrence of the given character in s. If the character
097         *         cannot be found, an empty string is returned.
098         */
099        public static String afterFirst(final String s, final char c)
100        {
101                if (s == null)
102                {
103                        return null;
104                }
105                final int index = s.indexOf(c);
106
107                if (index == -1)
108                {
109                        return "";
110                }
111
112                return s.substring(index + 1);
113        }
114
115        /**
116         * Gets everything after the first path component of a path using a given separator. If the
117         * separator cannot be found, an empty String is returned.
118         * <p>
119         * For example, afterFirstPathComponent("foo:bar:baz", ':') would return "bar:baz" and
120         * afterFirstPathComponent("foo", ':') would return "".
121         * 
122         * @param path
123         *            The path to parse
124         * @param separator
125         *            The path separator character
126         * @return Everything after the first component in the path
127         */
128        public static String afterFirstPathComponent(final String path, final char separator)
129        {
130                return afterFirst(path, separator);
131        }
132
133        /**
134         * Returns everything after the last occurrence of the given character in s.
135         * 
136         * @param s
137         *            The string
138         * @param c
139         *            The character
140         * @return Everything after the last occurrence of the given character in s. If the character
141         *         cannot be found, an empty string is returned.
142         */
143        public static String afterLast(final String s, final char c)
144        {
145                if (s == null)
146                {
147                        return null;
148                }
149                final int index = s.lastIndexOf(c);
150
151                if (index == -1)
152                {
153                        return "";
154                }
155
156                return s.substring(index + 1);
157        }
158
159        /**
160         * Returns everything before the first occurrence of the given character in s.
161         * 
162         * @param s
163         *            The string
164         * @param c
165         *            The character
166         * @return Everything before the first occurrence of the given character in s. If the character
167         *         cannot be found, an empty string is returned.
168         */
169        public static String beforeFirst(final String s, final char c)
170        {
171                if (s == null)
172                {
173                        return null;
174                }
175                final int index = s.indexOf(c);
176
177                if (index == -1)
178                {
179                        return "";
180                }
181
182                return s.substring(0, index);
183        }
184
185        /**
186         * Returns everything before the last occurrence of the given character in s.
187         * 
188         * @param s
189         *            The string
190         * @param c
191         *            The character
192         * @return Everything before the last occurrence of the given character in s. If the character
193         *         cannot be found, an empty string is returned.
194         */
195        public static String beforeLast(final String s, final char c)
196        {
197                if (s == null)
198                {
199                        return null;
200                }
201                final int index = s.lastIndexOf(c);
202
203                if (index == -1)
204                {
205                        return "";
206                }
207
208                return s.substring(0, index);
209        }
210
211        /**
212         * Gets everything before the last path component of a path using a given separator. If the
213         * separator cannot be found, the path itself is returned.
214         * <p>
215         * For example, beforeLastPathComponent("foo.bar.baz", '.') would return "foo.bar" and
216         * beforeLastPathComponent("foo", '.') would return "".
217         * 
218         * @param path
219         *            The path to parse
220         * @param separator
221         *            The path separator character
222         * @return Everything before the last component in the path
223         */
224        public static String beforeLastPathComponent(final String path, final char separator)
225        {
226                return beforeLast(path, separator);
227        }
228
229        /**
230         * Capitalizes a string.
231         * 
232         * @param s
233         *            The string
234         * @return The capitalized string
235         */
236        public static String capitalize(final String s)
237        {
238                if (s == null)
239                {
240                        return null;
241                }
242                final char[] chars = s.toCharArray();
243
244                if (chars.length > 0)
245                {
246                        chars[0] = Character.toUpperCase(chars[0]);
247                }
248
249                return new String(chars);
250        }
251
252        /**
253         * Converts a Java String to an HTML markup string, but does not convert normal spaces to
254         * non-breaking space entities (&lt;nbsp&gt;).
255         * 
256         * @param s
257         *            The characters to escape
258         * @see Strings#escapeMarkup(CharSequence, boolean)
259         * @return The escaped string
260         */
261        public static CharSequence escapeMarkup(final CharSequence s)
262        {
263                return escapeMarkup(s, false);
264        }
265
266        /**
267         * Converts a Java String to an HTML markup String by replacing illegal characters with HTML
268         * entities where appropriate. Spaces are converted to non-breaking spaces (&lt;nbsp&gt;) if
269         * escapeSpaces is true, tabs are converted to four non-breaking spaces, less than signs are
270         * converted to &amp;lt; entities and greater than signs to &amp;gt; entities.
271         * 
272         * @param s
273         *            The characters to escape
274         * @param escapeSpaces
275         *            True to replace ' ' with nonbreaking space
276         * @return The escaped string
277         */
278        public static CharSequence escapeMarkup(final CharSequence s, final boolean escapeSpaces)
279        {
280                return escapeMarkup(s, escapeSpaces, false);
281        }
282
283        /**
284         * Converts a Java String to an HTML markup String by replacing illegal characters with HTML
285         * entities where appropriate. Spaces are converted to non-breaking spaces (&lt;nbsp&gt;) if
286         * escapeSpaces is true, tabs are converted to four non-breaking spaces, less than signs are
287         * converted to &amp;lt; entities and greater than signs to &amp;gt; entities.
288         * 
289         * @param s
290         *            The characters to escape
291         * @param escapeSpaces
292         *            True to replace ' ' with nonbreaking space
293         * @param convertToHtmlUnicodeEscapes
294         *            True to convert non-7 bit characters to unicode HTML (&amp;#...)
295         * @return The escaped string
296         */
297        public static CharSequence escapeMarkup(final CharSequence s, final boolean escapeSpaces,
298                final boolean convertToHtmlUnicodeEscapes)
299        {
300                if (s == null)
301                {
302                        return null;
303                }
304
305                final int len = s.length();
306                if (len == 0)
307                {
308                        return s;
309                }
310
311                final AppendingStringBuffer buffer = new AppendingStringBuffer((int)(len * 1.1));
312
313                for (int i = 0; i < len; i++)
314                {
315                        final char c = s.charAt(i);
316
317                        if (Character.getType(c) == Character.UNASSIGNED)
318                        {
319                                continue;
320                        }
321                        switch (c)
322                        {
323                                case '\t' :
324                                        if (escapeSpaces)
325                                        {
326                                                // Assumption is four space tabs (sorry, but that's
327                                                // just how it is!)
328                                                buffer.append("&nbsp;&nbsp;&nbsp;&nbsp;");
329                                        }
330                                        else
331                                        {
332                                                buffer.append(c);
333                                        }
334                                        break;
335
336                                case ' ' :
337                                        if (escapeSpaces)
338                                        {
339                                                buffer.append("&nbsp;");
340                                        }
341                                        else
342                                        {
343                                                buffer.append(c);
344                                        }
345                                        break;
346
347                                case '<' :
348                                        buffer.append("&lt;");
349                                        break;
350
351                                case '>' :
352                                        buffer.append("&gt;");
353                                        break;
354
355                                case '&' :
356
357                                        buffer.append("&amp;");
358                                        break;
359
360                                case '"' :
361                                        buffer.append("&quot;");
362                                        break;
363
364                                case '\'' :
365                                        buffer.append("&#039;");
366                                        break;
367
368                                default :
369
370                                        int ci = 0xffff & c;
371
372                                        if (
373                                        // if this is non-printable and not whitespace (TAB, LF, CR)
374                                        ((ci < 32) && (ci != 9) && (ci != 10) && (ci != 13)) ||
375                                        // or non-ASCII (XXX: why 160+ ?!) and need to UNICODE escape it
376                                                (convertToHtmlUnicodeEscapes && (ci > 159)))
377                                        {
378                                                buffer.append("&#");
379                                                buffer.append(Integer.toString(ci));
380                                                buffer.append(';');
381                                        }
382                                        else
383                                        {
384                                                // ASCII or whitespace
385                                                buffer.append(c);
386                                        }
387                                        break;
388                        }
389                }
390
391                return buffer;
392        }
393
394        /**
395         * Unescapes the escaped entities in the <code>markup</code> passed.
396         * 
397         * @param markup
398         *            The source <code>String</code> to unescape.
399         * @return the unescaped markup or <code>null</null> if the input is <code>null</code>
400         */
401        public static CharSequence unescapeMarkup(final String markup)
402        {
403                String unescapedMarkup = StringEscapeUtils.unescapeHtml(markup);
404                return unescapedMarkup;
405        }
406
407        /**
408         * Gets the first path component of a path using a given separator. If the separator cannot be
409         * found, the path itself is returned.
410         * <p>
411         * For example, firstPathComponent("foo.bar", '.') would return "foo" and
412         * firstPathComponent("foo", '.') would return "foo".
413         * 
414         * @param path
415         *            The path to parse
416         * @param separator
417         *            The path separator character
418         * @return The first component in the path or path itself if no separator characters exist.
419         */
420        public static String firstPathComponent(final String path, final char separator)
421        {
422                if (path == null)
423                {
424                        return null;
425                }
426                final int index = path.indexOf(separator);
427
428                if (index == -1)
429                {
430                        return path;
431                }
432
433                return path.substring(0, index);
434        }
435
436        /**
437         * Converts encoded &#92;uxxxx to unicode chars and changes special saved chars to their
438         * original forms.
439         * 
440         * @param escapedUnicodeString
441         *            escaped unicode string, like '\u4F60\u597D'.
442         * 
443         * @return The actual unicode. Can be used for instance with message bundles
444         */
445        public static String fromEscapedUnicode(final String escapedUnicodeString)
446        {
447                int off = 0;
448                char[] in = escapedUnicodeString.toCharArray();
449                int len = in.length;
450                char[] out = new char[len];
451                char aChar;
452                int outLen = 0;
453                int end = off + len;
454
455                while (off < end)
456                {
457                        aChar = in[off++];
458                        if (aChar == '\\')
459                        {
460                                aChar = in[off++];
461                                if (aChar == 'u')
462                                {
463                                        // Read the xxxx
464                                        int value = 0;
465                                        for (int i = 0; i < 4; i++)
466                                        {
467                                                aChar = in[off++];
468                                                switch (aChar)
469                                                {
470                                                        case '0' :
471                                                        case '1' :
472                                                        case '2' :
473                                                        case '3' :
474                                                        case '4' :
475                                                        case '5' :
476                                                        case '6' :
477                                                        case '7' :
478                                                        case '8' :
479                                                        case '9' :
480                                                                value = (value << 4) + aChar - '0';
481                                                                break;
482                                                        case 'a' :
483                                                        case 'b' :
484                                                        case 'c' :
485                                                        case 'd' :
486                                                        case 'e' :
487                                                        case 'f' :
488                                                                value = (value << 4) + 10 + aChar - 'a';
489                                                                break;
490                                                        case 'A' :
491                                                        case 'B' :
492                                                        case 'C' :
493                                                        case 'D' :
494                                                        case 'E' :
495                                                        case 'F' :
496                                                                value = (value << 4) + 10 + aChar - 'A';
497                                                                break;
498                                                        default :
499                                                                throw new IllegalArgumentException("Malformed \\uxxxx encoding.");
500                                                }
501                                        }
502                                        out[outLen++] = (char)value;
503                                }
504                                else
505                                {
506                                        if (aChar == 't')
507                                        {
508                                                aChar = '\t';
509                                        }
510                                        else if (aChar == 'r')
511                                        {
512                                                aChar = '\r';
513                                        }
514                                        else if (aChar == 'n')
515                                        {
516                                                aChar = '\n';
517                                        }
518                                        else if (aChar == 'f')
519                                        {
520                                                aChar = '\f';
521                                        }
522                                        out[outLen++] = aChar;
523                                }
524                        }
525                        else
526                        {
527                                out[outLen++] = aChar;
528                        }
529                }
530                return new String(out, 0, outLen);
531        }
532
533        /**
534         * Checks whether the <code>string</code> is considered empty. Empty means that the string may
535         * contain whitespace, but no visible characters.
536         * 
537         * "\n\t " is considered empty, while " a" is not.
538         * 
539         * @param string
540         *            The string
541         * @return True if the string is null or ""
542         */
543        public static boolean isEmpty(final CharSequence string)
544        {
545                return string == null || string.length() == 0 ||
546                        (string.charAt(0) <= ' ' && string.toString().trim().isEmpty());
547        }
548
549        /**
550         * Checks whether the <code>string</code> is considered empty. Empty means that the string may
551         * contain whitespace, but no visible characters.
552         *
553         * "\n\t " is considered empty, while " a" is not.
554         * 
555         * Note: This method overloads {@link #isEmpty(CharSequence)} for performance reasons.
556         *
557         * @param string
558         *            The string
559         * @return True if the string is null or ""
560         */
561        public static boolean isEmpty(final String string)
562        {
563                return string == null || string.isEmpty() ||
564                        (string.charAt(0) <= ' ' && string.trim().isEmpty());
565        }
566
567        /**
568         * Checks whether two strings are equals taken care of 'null' values and treating 'null' same as
569         * trim(string).equals("")
570         * 
571         * @param string1
572         * @param string2
573         * @return true, if both strings are equal
574         */
575        public static boolean isEqual(final String string1, final String string2)
576        {
577                if ((string1 == null) && (string2 == null))
578                {
579                        return true;
580                }
581
582                if (isEmpty(string1) && isEmpty(string2))
583                {
584                        return true;
585                }
586                if ((string1 == null) || (string2 == null))
587                {
588                        return false;
589                }
590
591                return string1.equals(string2);
592        }
593
594        /**
595         * Converts the text in <code>s</code> to a corresponding boolean. On, yes, y, true and 1 are
596         * converted to <code>true</code>. Off, no, n, false and 0 (zero) are converted to
597         * <code>false</code>. An empty string is converted to <code>false</code>. Conversion is
598         * case-insensitive, and does <em>not</em> take internationalization into account.
599         * 
600         * 'Ja', 'Oui', 'Igen', 'Nein', 'Nee', 'Non', 'Nem' are all illegal values.
601         * 
602         * @param s
603         *            the value to convert into a boolean
604         * @return Boolean the converted value of <code>s</code>
605         * @throws StringValueConversionException
606         *             when the value of <code>s</code> is not recognized.
607         */
608        public static boolean isTrue(final String s) throws StringValueConversionException
609        {
610                if (s != null)
611                {
612                        if (s.equalsIgnoreCase("true"))
613                        {
614                                return true;
615                        }
616
617                        if (s.equalsIgnoreCase("false"))
618                        {
619                                return false;
620                        }
621
622                        if (s.equalsIgnoreCase("on") || s.equalsIgnoreCase("yes") || s.equalsIgnoreCase("y") ||
623                                s.equalsIgnoreCase("1"))
624                        {
625                                return true;
626                        }
627
628                        if (s.equalsIgnoreCase("off") || s.equalsIgnoreCase("no") || s.equalsIgnoreCase("n") ||
629                                s.equalsIgnoreCase("0"))
630                        {
631                                return false;
632                        }
633
634                        if (isEmpty(s))
635                        {
636                                return false;
637                        }
638
639                        throw new StringValueConversionException("Boolean value \"" + s + "\" not recognized");
640                }
641
642                return false;
643        }
644
645        /**
646         * Joins string fragments using the specified separator
647         * 
648         * @param separator
649         * @param fragments
650         * @return combined fragments
651     */
652        public static String join(final String separator, final List<String> fragments)
653        {
654                if (fragments == null)
655                {
656                        return "";
657                }
658                return join(separator, fragments.toArray(new String[0]));
659        }
660
661        /**
662         * Joins string fragments using the specified separator
663         * 
664         * @param separator
665         * @param fragments
666         * @return combined fragments
667         */
668        public static String join(final String separator, final String... fragments)
669        {
670                if ((fragments == null) || (fragments.length < 1))
671                {
672                        // no elements
673                        return "";
674                }
675                else if (fragments.length < 2)
676                {
677                        // single element
678                        return fragments[0];
679                }
680                else
681                {
682                        // two or more elements
683                        AppendingStringBuffer buff = new AppendingStringBuffer(128);
684                        if (fragments[0] != null)
685                        {
686                                buff.append(fragments[0]);
687                        }
688                        boolean separatorNotEmpty = !Strings.isEmpty(separator);
689                        for (int i = 1; i < fragments.length; i++)
690                        {
691                                String fragment = fragments[i];
692                                String previousFragment = fragments[i - 1];
693                                if (previousFragment != null || fragment != null)
694                                {
695                                        boolean lhsClosed = previousFragment.endsWith(separator);
696                                        boolean rhsClosed = fragment.startsWith(separator);
697                                        if (separatorNotEmpty && lhsClosed && rhsClosed)
698                                        {
699                                                buff.append(fragment.substring(1));
700                                        }
701                                        else if (!lhsClosed && !rhsClosed)
702                                        {
703                                                if (!Strings.isEmpty(fragment))
704                                                {
705                                                        buff.append(separator);
706                                                }
707                                                buff.append(fragment);
708                                        }
709                                        else
710                                        {
711                                                buff.append(fragment);
712                                        }
713                                }
714                        }
715                        return buff.toString();
716                }
717        }
718
719        /**
720         * Gets the last path component of a path using a given separator. If the separator cannot be
721         * found, the path itself is returned.
722         * <p>
723         * For example, lastPathComponent("foo.bar", '.') would return "bar" and
724         * lastPathComponent("foo", '.') would return "foo".
725         * 
726         * @param path
727         *            The path to parse
728         * @param separator
729         *            The path separator character
730         * @return The last component in the path or path itself if no separator characters exist.
731         */
732        public static String lastPathComponent(final String path, final char separator)
733        {
734                if (path == null)
735                {
736                        return null;
737                }
738
739                final int index = path.lastIndexOf(separator);
740
741                if (index == -1)
742                {
743                        return path;
744                }
745
746                return path.substring(index + 1);
747        }
748
749        /**
750         * Replace all occurrences of one string replaceWith another string.
751         * 
752         * @param s
753         *            The string to process
754         * @param searchFor
755         *            The value to search for
756         * @param replaceWith
757         *            The value to searchFor replaceWith
758         * @return The resulting string with searchFor replaced with replaceWith
759         */
760        public static CharSequence replaceAll(final CharSequence s, final CharSequence searchFor,
761                CharSequence replaceWith)
762        {
763                if (s == null)
764                {
765                        return null;
766                }
767
768                // If searchFor is null or the empty string, then there is nothing to
769                // replace, so returning s is the only option here.
770                if ((searchFor == null) || searchFor.length() == 0)
771                {
772                        return s;
773                }
774
775                // If replaceWith is null, then the searchFor should be replaced with
776                // nothing, which can be seen as the empty string.
777                if (replaceWith == null)
778                {
779                        replaceWith = "";
780                }
781
782                String searchString = searchFor.toString();
783                // Look for first occurrence of searchFor
784                int matchIndex = search(s, searchString, 0);
785                if (matchIndex == -1)
786                {
787                        // No replace operation needs to happen
788                        return s;
789                }
790                else
791                {
792                        return s.toString().replace(searchString, replaceWith);
793                }
794        }
795
796        /**
797         * Replace HTML numbers like &amp;#20540; by the appropriate character.
798         * 
799         * @param str
800         *            The text to be evaluated
801         * @return The text with "numbers" replaced
802         */
803        public static String replaceHtmlEscapeNumber(String str)
804        {
805                if (str == null)
806                {
807                        return null;
808                }
809                Matcher matcher = HTML_NUMBER_REGEX.matcher(str);
810                while (matcher.find())
811                {
812                        int pos = matcher.start();
813                        int end = matcher.end();
814                        int number = Integer.parseInt(str.substring(pos + 2, end - 1));
815                        char ch = (char)number;
816                        str = str.substring(0, pos) + ch + str.substring(end);
817                        matcher = HTML_NUMBER_REGEX.matcher(str);
818                }
819
820                return str;
821        }
822
823        /**
824         * Simpler, faster version of String.split() for splitting on a simple character.
825         * 
826         * @param s
827         *            The string to split
828         * @param c
829         *            The character to split on
830         * @return The array of strings
831         */
832        public static String[] split(final String s, final char c)
833        {
834                if (s == null || s.isEmpty())
835                {
836                        return NO_STRINGS;
837                }
838
839                int pos = s.indexOf(c);
840                if (pos == -1)
841                {
842                        return new String[] { s };
843                }
844
845                int next = s.indexOf(c, pos + 1);
846                if (next == -1)
847                {
848                        return new String[] { s.substring(0, pos), s.substring(pos + 1) };
849                }
850
851                final List<String> strings = new ArrayList<>();
852                strings.add(s.substring(0, pos));
853                strings.add(s.substring(pos + 1, next));
854                while (true)
855                {
856                        pos = next + 1;
857                        next = s.indexOf(c, pos);
858                        if (next == -1)
859                        {
860                                strings.add(s.substring(pos));
861                                break;
862                        }
863                        else
864                        {
865                                strings.add(s.substring(pos, next));
866                        }
867                }
868                final String[] result = new String[strings.size()];
869                strings.toArray(result);
870                return result;
871        }
872
873        /**
874         * Strips the ending from the string <code>s</code>.
875         * 
876         * @param s
877         *            The string to strip
878         * @param ending
879         *            The ending to strip off
880         * @return The stripped string or the original string if the ending did not exist
881         */
882        public static String stripEnding(final String s, final String ending)
883        {
884                if (s == null)
885                {
886                        return null;
887                }
888
889                // Stripping a null or empty string from the end returns the
890                // original string.
891                if (ending == null || ending.isEmpty())
892                {
893                        return s;
894                }
895                final int endingLength = ending.length();
896                final int sLength = s.length();
897
898                // When the length of the ending string is larger
899                // than the original string, the original string is returned.
900                if (endingLength > sLength)
901                {
902                        return s;
903                }
904                final int index = s.lastIndexOf(ending);
905                final int endpos = sLength - endingLength;
906
907                if (index == endpos)
908                {
909                        return s.substring(0, endpos);
910                }
911
912                return s;
913        }
914
915        /**
916         * Strip any jsessionid and possibly other redundant info that might be in our way.
917         * 
918         * @param url
919         *            The url to strip
920         * @return The stripped url
921         */
922        public static String stripJSessionId(final String url)
923        {
924                if (Strings.isEmpty(url))
925                {
926                        return url;
927                }
928
929                // http://.../abc;jsessionid=...?param=...
930                int ixSemiColon = url.indexOf(SESSION_ID_PARAM);
931                if (ixSemiColon == -1)
932                {
933                        return url;
934                }
935
936                int ixQuestionMark = url.indexOf('?');
937                if (ixQuestionMark == -1)
938                {
939                        // no query paramaters; cut off at ";"
940                        // http://.../abc;jsession=...
941                        return url.substring(0, ixSemiColon);
942                }
943
944                if (ixQuestionMark <= ixSemiColon)
945                {
946                        // ? is before ; - no jsessionid in the url
947                        return url;
948                }
949
950                return url.substring(0, ixSemiColon) + url.substring(ixQuestionMark);
951        }
952
953        /**
954         * Converts the string s to a Boolean. See <code>isTrue</code> for valid values of s.
955         * 
956         * @param s
957         *            The string to convert.
958         * @return Boolean <code>TRUE</code> when <code>isTrue(s)</code>.
959         * @throws StringValueConversionException
960         *             when s is not a valid value
961         * @see #isTrue(String)
962         */
963        public static Boolean toBoolean(final String s) throws StringValueConversionException
964        {
965                return isTrue(s);
966        }
967
968        /**
969         * Converts the 1 character string s to a character.
970         * 
971         * @param s
972         *            The 1 character string to convert to a char.
973         * @return Character value to convert
974         * @throws StringValueConversionException
975         *             when the string is longer or shorter than 1 character, or <code>null</code>.
976         */
977        public static char toChar(final String s) throws StringValueConversionException
978        {
979                if (s != null)
980                {
981                        if (s.length() == 1)
982                        {
983                                return s.charAt(0);
984                        }
985                        else
986                        {
987                                throw new StringValueConversionException("Expected single character, not \"" + s +
988                                        "\"");
989                        }
990                }
991
992                throw new StringValueConversionException("Character value was null");
993        }
994
995        /**
996         * Converts unicodes to encoded &#92;uxxxx.
997         * 
998         * @param unicodeString
999         *            The unicode string
1000         * @return The escaped unicode string, like '\u4F60\u597D'.
1001         */
1002        public static String toEscapedUnicode(final String unicodeString)
1003        {
1004                if (unicodeString == null || unicodeString.isEmpty())
1005                {
1006                        return unicodeString;
1007                }
1008                int len = unicodeString.length();
1009                int bufLen = len * 2;
1010                StringBuilder outBuffer = new StringBuilder(bufLen);
1011                for (int x = 0; x < len; x++)
1012                {
1013                        char aChar = unicodeString.charAt(x);
1014                        if (Character.getType(aChar) == Character.UNASSIGNED)
1015                        {
1016                                continue;
1017                        }
1018                        // Handle common case first, selecting largest block that
1019                        // avoids the specials below
1020                        if ((aChar > 61) && (aChar < 127))
1021                        {
1022                                if (aChar == '\\')
1023                                {
1024                                        outBuffer.append('\\');
1025                                        outBuffer.append('\\');
1026                                        continue;
1027                                }
1028                                outBuffer.append(aChar);
1029                                continue;
1030                        }
1031                        switch (aChar)
1032                        {
1033                                case ' ' :
1034                                        if (x == 0)
1035                                        {
1036                                                outBuffer.append('\\');
1037                                        }
1038                                        outBuffer.append(' ');
1039                                        break;
1040                                case '\t' :
1041                                        outBuffer.append('\\');
1042                                        outBuffer.append('t');
1043                                        break;
1044                                case '\n' :
1045                                        outBuffer.append('\\');
1046                                        outBuffer.append('n');
1047                                        break;
1048                                case '\r' :
1049                                        outBuffer.append('\\');
1050                                        outBuffer.append('r');
1051                                        break;
1052                                case '\f' :
1053                                        outBuffer.append('\\');
1054                                        outBuffer.append('f');
1055                                        break;
1056                                case '=' : // Fall through
1057                                case ':' : // Fall through
1058                                case '#' : // Fall through
1059                                case '!' :
1060                                        outBuffer.append('\\');
1061                                        outBuffer.append(aChar);
1062                                        break;
1063                                default :
1064                                        if ((aChar < 0x0020) || (aChar > 0x007e))
1065                                        {
1066                                                outBuffer.append('\\');
1067                                                outBuffer.append('u');
1068                                                outBuffer.append(toHex((aChar >> 12) & 0xF));
1069                                                outBuffer.append(toHex((aChar >> 8) & 0xF));
1070                                                outBuffer.append(toHex((aChar >> 4) & 0xF));
1071                                                outBuffer.append(toHex(aChar & 0xF));
1072                                        }
1073                                        else
1074                                        {
1075                                                outBuffer.append(aChar);
1076                                        }
1077                        }
1078                }
1079                return outBuffer.toString();
1080        }
1081
1082        /**
1083         * Converts a String to multiline HTML markup by replacing newlines with line break entities
1084         * (&lt;br/&gt;) and multiple occurrences of newline with paragraph break entities (&lt;p&gt;).
1085         * 
1086         * @param s
1087         *            String to transform
1088         * @return String with all single occurrences of newline replaced with &lt;br/&gt; and all
1089         *         multiple occurrences of newline replaced with &lt;p&gt;.
1090         */
1091        public static CharSequence toMultilineMarkup(final CharSequence s)
1092        {
1093                if (s == null)
1094                {
1095                        return null;
1096                }
1097                
1098                final int len = s.length();
1099                
1100                // allocate a buffer that is 10% larger than the original string to account for markup
1101                final AppendingStringBuffer buffer = new AppendingStringBuffer((int) (len * 1.1) + 16);
1102                int newlineCount = 0;
1103
1104                buffer.append("<p>");
1105                for (int i = 0; i < len; i++)
1106                {
1107                        final char c = s.charAt(i);
1108
1109                        switch (c)
1110                        {
1111                                case '\n' :
1112                                        newlineCount++;
1113                                        break;
1114
1115                                case '\r' :
1116                                        break;
1117
1118                                default :
1119                                        if (newlineCount == 1)
1120                                        {
1121                                                buffer.append("<br/>");
1122                                        }
1123                                        else if (newlineCount > 1)
1124                                        {
1125                                                buffer.append("</p><p>");
1126                                        }
1127
1128                                        buffer.append(c);
1129                                        newlineCount = 0;
1130                                        break;
1131                        }
1132                }
1133                if (newlineCount == 1)
1134                {
1135                        buffer.append("<br/>");
1136                }
1137                else if (newlineCount > 1)
1138                {
1139                        buffer.append("</p><p>");
1140                }
1141                buffer.append("</p>");
1142                return buffer;
1143        }
1144
1145        /**
1146         * Converts the given object to a string. Does special conversion for {@link Throwable
1147         * throwables} and String arrays of length 1 (in which case it just returns to string in that
1148         * array, as this is a common thing to have in the Servlet API).
1149         * 
1150         * @param object
1151         *            The object
1152         * @return The string
1153         */
1154        public static String toString(final Object object)
1155        {
1156                if (object == null)
1157                {
1158                        return null;
1159                }
1160
1161                if (object instanceof Throwable)
1162                {
1163                        return toString((Throwable)object);
1164                }
1165
1166                if (object instanceof String)
1167                {
1168                        return (String)object;
1169                }
1170
1171                if ((object instanceof String[]) && (((String[])object).length == 1))
1172                {
1173                        return ((String[])object)[0];
1174                }
1175
1176                return object.toString();
1177        }
1178
1179
1180        /**
1181         * Converts a Throwable to a string.
1182         * 
1183         * @param throwable
1184         *            The throwable
1185         * @return The string
1186         */
1187        public static String toString(final Throwable throwable)
1188        {
1189                if (throwable != null)
1190                {
1191                        List<Throwable> al = new ArrayList<>();
1192                        Throwable cause = throwable;
1193                        al.add(cause);
1194                        while ((cause.getCause() != null) && (cause != cause.getCause()))
1195                        {
1196                                cause = cause.getCause();
1197                                al.add(cause);
1198                        }
1199
1200                        AppendingStringBuffer sb = new AppendingStringBuffer(256);
1201                        // first print the last cause
1202                        int length = al.size() - 1;
1203                        cause = al.get(length);
1204                        if (throwable instanceof RuntimeException)
1205                        {
1206                                sb.append("Message: ");
1207                                sb.append(throwable.getMessage());
1208                                sb.append("\n\n");
1209                        }
1210                        sb.append("Root cause:\n\n");
1211                        outputThrowable(cause, sb, false);
1212
1213                        if (length > 0)
1214                        {
1215                                sb.append("\n\nComplete stack:\n\n");
1216                                for (int i = 0; i < length; i++)
1217                                {
1218                                        outputThrowable(al.get(i), sb, true);
1219                                        sb.append('\n');
1220                                }
1221                        }
1222                        return sb.toString();
1223                }
1224                else
1225                {
1226                        return "<Null Throwable>";
1227                }
1228        }
1229
1230        private static void append(final AppendingStringBuffer buffer, final CharSequence s,
1231                final int from, final int to)
1232        {
1233                if (s instanceof AppendingStringBuffer)
1234                {
1235                        AppendingStringBuffer asb = (AppendingStringBuffer)s;
1236                        buffer.append(asb.getValue(), from, to - from);
1237                }
1238                else
1239                {
1240                        buffer.append(s.subSequence(from, to));
1241                }
1242        }
1243
1244        /**
1245         * Outputs the throwable and its stacktrace to the stringbuffer. If stopAtWicketSerlvet is true
1246         * then the output will stop when the org.apache.wicket servlet is reached. sun.reflect.
1247         * packages are filtered out.
1248         * 
1249         * @param cause
1250         * @param sb
1251         * @param stopAtWicketServlet
1252         */
1253        private static void outputThrowable(final Throwable cause, final AppendingStringBuffer sb,
1254                final boolean stopAtWicketServlet)
1255        {
1256                sb.append(cause);
1257                sb.append("\n");
1258                StackTraceElement[] trace = cause.getStackTrace();
1259                for (int i = 0; i < trace.length; i++)
1260                {
1261                        String traceString = trace[i].toString();
1262                        if (!(traceString.startsWith("sun.reflect.") && (i > 1)))
1263                        {
1264                                sb.append("     at ");
1265                                sb.append(traceString);
1266                                sb.append("\n");
1267                                if (stopAtWicketServlet &&
1268                                        (traceString.startsWith("org.apache.wicket.protocol.http.WicketServlet") || traceString.startsWith("org.apache.wicket.protocol.http.WicketFilter")))
1269                                {
1270                                        return;
1271                                }
1272                        }
1273                }
1274        }
1275
1276        private static int search(final CharSequence s, final String searchString, final int pos)
1277        {
1278                if (s instanceof String)
1279                {
1280                        return ((String)s).indexOf(searchString, pos);
1281                }
1282                else if (s instanceof StringBuffer)
1283                {
1284                        return ((StringBuffer)s).indexOf(searchString, pos);
1285                }
1286                else if (s instanceof StringBuilder)
1287                {
1288                        return ((StringBuilder)s).indexOf(searchString, pos);
1289                }
1290                else if (s instanceof AppendingStringBuffer)
1291                {
1292                        return ((AppendingStringBuffer)s).indexOf(searchString, pos);
1293                }
1294                else
1295                {
1296                        return s.toString().indexOf(searchString, pos);
1297                }
1298        }
1299
1300        /**
1301         * Convert a nibble to a hex character
1302         * 
1303         * @param nibble
1304         *            the nibble to convert.
1305         * @return hex character
1306         */
1307        private static char toHex(final int nibble)
1308        {
1309                return HEX_DIGIT[(nibble & 0xF)];
1310        }
1311
1312        /**
1313         * Calculates the length of string in bytes, uses specified <code>charset</code> if provided.
1314         * 
1315         * @param string
1316         * @param charset
1317         *            (optional) character set to use when converting string to bytes
1318         * @return length of string in bytes
1319         */
1320        public static int lengthInBytes(final String string, final Charset charset)
1321        {
1322                Args.notNull(string, "string");
1323                if (charset != null)
1324                {
1325                        try
1326                        {
1327                                return string.getBytes(charset.name()).length;
1328                        }
1329                        catch (UnsupportedEncodingException e)
1330                        {
1331                                throw new RuntimeException(
1332                                        "StringResourceStream created with unsupported charset: " + charset.name());
1333                        }
1334                }
1335                else
1336                {
1337                        return string.getBytes().length;
1338                }
1339        }
1340
1341        /**
1342         * Extended {@link String#startsWith(String)} with support for case sensitivity
1343         * 
1344         * @param str
1345         * @param prefix
1346         * @param caseSensitive
1347         * @return <code>true</code> if <code>str</code> starts with <code>prefix</code>
1348         */
1349        public static boolean startsWith(final String str, final String prefix,
1350                final boolean caseSensitive)
1351        {
1352                if (caseSensitive)
1353                {
1354                        return str.startsWith(prefix);
1355                }
1356                else
1357                {
1358                        return str.toLowerCase(Locale.ROOT).startsWith(prefix.toLowerCase(Locale.ROOT));
1359                }
1360        }
1361
1362        /**
1363         * returns the zero-based index of a character within a char sequence. this method mainly exists
1364         * as an faster alternative for <code>sequence.toString().indexOf(ch)</code>.
1365         * 
1366         * @param sequence
1367         *            character sequence
1368         * @param ch
1369         *            character to search for
1370         * @return index of character within character sequence or <code>-1</code> if not found
1371         */
1372        public static int indexOf(final CharSequence sequence, final char ch)
1373        {
1374                if (sequence != null)
1375                {
1376                        for (int i = 0; i < sequence.length(); i++)
1377                        {
1378                                if (sequence.charAt(i) == ch)
1379                                {
1380                                        return i;
1381                                }
1382                        }
1383                }
1384
1385                return -1;
1386        }
1387
1388        /**
1389         * <p>
1390         * Find the Levenshtein distance between two Strings.
1391         * </p>
1392         * 
1393         * <p>
1394         * This is the number of changes needed to change one String into another, where each change is
1395         * a single character modification (deletion, insertion or substitution).
1396         * </p>
1397         * 
1398         * <p>
1399         * The previous implementation of the Levenshtein distance algorithm was from <a
1400         * href="http://www.merriampark.com/ld.htm">http://www.merriampark.com/ld.htm</a>
1401         * </p>
1402         * 
1403         * <p>
1404         * Chas Emerick has written an implementation in Java, which avoids an OutOfMemoryError which
1405         * can occur when my Java implementation is used with very large strings.<br>
1406         * This implementation of the Levenshtein distance algorithm is from <a
1407         * href="http://www.merriampark.com/ldjava.htm">http://www.merriampark.com/ldjava.htm</a>
1408         * </p>
1409         * 
1410         * <pre>
1411         * Strings.getLevenshteinDistance(null, *)             = IllegalArgumentException
1412         * Strings.getLevenshteinDistance(*, null)             = IllegalArgumentException
1413         * Strings.getLevenshteinDistance("","")               = 0
1414         * Strings.getLevenshteinDistance("","a")              = 1
1415         * Strings.getLevenshteinDistance("aaapppp", "")       = 7
1416         * Strings.getLevenshteinDistance("frog", "fog")       = 1
1417         * Strings.getLevenshteinDistance("fly", "ant")        = 3
1418         * Strings.getLevenshteinDistance("elephant", "hippo") = 7
1419         * Strings.getLevenshteinDistance("hippo", "elephant") = 7
1420         * Strings.getLevenshteinDistance("hippo", "zzzzzzzz") = 8
1421         * Strings.getLevenshteinDistance("hello", "hallo")    = 1
1422         * </pre>
1423         * 
1424         * Copied from Apache commons-lang StringUtils 3.0
1425         * 
1426         * @param s
1427         *            the first String, must not be null
1428         * @param t
1429         *            the second String, must not be null
1430         * @return result distance
1431         * @throws IllegalArgumentException
1432         *             if either String input {@code null}
1433         */
1434        public static int getLevenshteinDistance(CharSequence s, CharSequence t)
1435        {
1436                if (s == null || t == null)
1437                {
1438                        throw new IllegalArgumentException("Strings must not be null");
1439                }
1440
1441                /*
1442                 * The difference between this impl. and the previous is that, rather than creating and
1443                 * retaining a matrix of size s.length()+1 by t.length()+1, we maintain two
1444                 * single-dimensional arrays of length s.length()+1. The first, d, is the 'current working'
1445                 * distance array that maintains the newest distance cost counts as we iterate through the
1446                 * characters of String s. Each time we increment the index of String t we are comparing, d
1447                 * is copied to p, the second int[]. Doing so allows us to retain the previous cost counts
1448                 * as required by the algorithm (taking the minimum of the cost count to the left, up one,
1449                 * and diagonally up and to the left of the current cost count being calculated). (Note that
1450                 * the arrays aren't really copied anymore, just switched...this is clearly much better than
1451                 * cloning an array or doing a System.arraycopy() each time through the outer loop.)
1452                 * 
1453                 * Effectively, the difference between the two implementations is this one does not cause an
1454                 * out of memory condition when calculating the LD over two very large strings.
1455                 */
1456
1457                int n = s.length(); // length of s
1458                int m = t.length(); // length of t
1459
1460                if (n == 0)
1461                {
1462                        return m;
1463                }
1464                else if (m == 0)
1465                {
1466                        return n;
1467                }
1468
1469                if (n > m)
1470                {
1471                        // swap the input strings to consume less memory
1472                        CharSequence tmp = s;
1473                        s = t;
1474                        t = tmp;
1475                        n = m;
1476                        m = t.length();
1477                }
1478
1479                int p[] = new int[n + 1]; // 'previous' cost array, horizontally
1480                int d[] = new int[n + 1]; // cost array, horizontally
1481                int _d[]; // placeholder to assist in swapping p and d
1482
1483                // indexes into strings s and t
1484                int i; // iterates through s
1485                int j; // iterates through t
1486
1487                char t_j; // jth character of t
1488
1489                int cost; // cost
1490
1491                for (i = 0; i <= n; i++)
1492                {
1493                        p[i] = i;
1494                }
1495
1496                for (j = 1; j <= m; j++)
1497                {
1498                        t_j = t.charAt(j - 1);
1499                        d[0] = j;
1500
1501                        for (i = 1; i <= n; i++)
1502                        {
1503                                cost = s.charAt(i - 1) == t_j ? 0 : 1;
1504                                // minimum of cell to the left+1, to the top+1, diagonally left and up +cost
1505                                d[i] = Math.min(Math.min(d[i - 1] + 1, p[i] + 1), p[i - 1] + cost);
1506                        }
1507
1508                        // copy current distance counts to 'previous row' distance counts
1509                        _d = p;
1510                        p = d;
1511                        d = _d;
1512                }
1513
1514                // our last action in the above loop was to switch d and p, so p now
1515                // actually has the most recent cost counts
1516                return p[n];
1517        }
1518
1519        /**
1520         * convert byte array to hex string
1521         * 
1522         * @param bytes
1523         *          bytes to convert to hexadecimal representation
1524         *
1525         * @return hex string 
1526         */
1527        public static String toHexString(byte[] bytes)
1528        {
1529                Args.notNull(bytes, "bytes");
1530
1531                final StringBuilder hex = new StringBuilder(bytes.length << 1);
1532
1533                for (final byte b : bytes)
1534                {
1535                        hex.append(toHex(b >> 4));
1536                        hex.append(toHex(b));
1537                }
1538                return hex.toString();
1539        }
1540
1541
1542        /**
1543         * Return this value as en enum value.
1544         *
1545         * @param value
1546         *            the value to convert to an enum value
1547         * @param enumClass
1548         *            the enum type
1549         * @return an enum value
1550         */
1551        public static <T extends Enum<T>> T toEnum(final CharSequence value, final Class<T> enumClass)
1552        {
1553                Args.notNull(enumClass, "enumClass");
1554                Args.notNull(value, "value");
1555
1556                try
1557                {
1558                        return Enum.valueOf(enumClass, value.toString());
1559                }
1560                catch (Exception e)
1561                {
1562                        throw new StringValueConversionException(
1563                                        String.format("Cannot convert '%s' to enum constant of type '%s'.", value, enumClass), e);
1564                }
1565        }
1566
1567        /**
1568         * Returns the original string if this one is not empty (i.e. {@link #isEmpty(CharSequence)} returns false), 
1569         * otherwise the default one is returned. The default string might be itself an empty one.
1570         * 
1571         * @param originalString
1572         *                              the original sting value
1573         * @param defaultValue
1574         *                              the default string to return if the original is empty
1575         * @return      the original string value if not empty, the default one otherwise
1576         */
1577        public static String defaultIfEmpty(String originalString, String defaultValue)
1578        {
1579                return isEmpty(originalString) ? defaultValue : originalString;         
1580        }
1581}