001    /*--------------------------------------------------------------------------+
002    $Id: LineSplitter.java 26268 2010-02-18 10:44:30Z juergens $
003    |                                                                          |
004    | Copyright 2005-2010 Technische Universitaet Muenchen                     |
005    |                                                                          |
006    | Licensed under the Apache License, Version 2.0 (the "License");          |
007    | you may not use this file except in compliance with the License.         |
008    | You may obtain a copy of the License at                                  |
009    |                                                                          |
010    |    http://www.apache.org/licenses/LICENSE-2.0                            |
011    |                                                                          |
012    | Unless required by applicable law or agreed to in writing, software      |
013    | distributed under the License is distributed on an "AS IS" BASIS,        |
014    | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
015    | See the License for the specific language governing permissions and      |
016    | limitations under the License.                                           |
017    +--------------------------------------------------------------------------*/
018    package edu.tum.cs.commons.string;
019    
020    /**
021     * This class is used to split a string in lines.
022     * <p>
023     * <b>Note:</b> According to tests I performed this is the fastest method to
024     * split a string. It is about nine times faster than the regex-bases split
025     * with:
026     * 
027     * <pre>
028     * Pattern pattern = Pattern.compile(&quot;\r\n|\r|\n&quot;);
029     * pattern.split(content);
030     * </pre>
031     * 
032     * @author Florian Deissenboeck
033     * @author $Author: juergens $
034     * 
035     * @version $Revision: 26268 $
036     * @levd.rating GREEN Hash: F99C8B9E8F156988EBFA29796D5D1AEF
037     */
038    public class LineSplitter {
039    
040            /** Character array. */
041            private char[] characters;
042    
043            /** Starting index. */
044            private int startIndex;
045    
046            /** Length of the line to be returned from {@link #getNextLine()}. */
047            private int length;
048    
049            /**
050             * Set the string to split.
051             * 
052             * @param content
053             *            the string to split. if string is <code>null</code> or the
054             *            empty string, {@link #getNextLine()} will return
055             *            <code>null</code>
056             * 
057             */
058            public void setContent(String content) {
059                    if (content == null) {
060                            characters = null;
061                    } else {
062                            characters = content.toCharArray();
063                    }
064                    startIndex = 0;
065                    length = 0;
066            }
067    
068            /**
069             * Obtain next identified line.
070             * 
071             * @return <code>null</code> if all lines were returned. On returning the
072             *         last line all references to the input string are deleted. So it
073             *         is free for garbage collection.
074             */
075            public String getNextLine() {
076    
077                    if (characters == null) {
078                            return null;
079                    }
080    
081                    startIndex = startIndex + length;
082    
083                    if (startIndex >= characters.length) {
084    
085                            // delete reference to array to allow garbage collection
086                            characters = null;
087                            return null;
088                    }
089    
090                    // length to skip may vary due to the length of the line separator (\r,
091                    // \n or \r\n)
092                    int skip = 0;
093    
094                    int i = startIndex;
095    
096                    while (skip == 0 && i < characters.length) {
097                            char c = characters[i];
098    
099                            i++;
100    
101                            // Skip newlines.
102                            if (c == '\n') {
103                                    skip = 1;
104                            }
105    
106                            // Skip newlines.
107                            if (c == '\r') {
108                                    skip = 1;
109                                    if (i < characters.length) {
110                                            if (characters[i] == '\n') {
111                                                    skip = 2;
112                                                    i++;
113                                            }
114                                    }
115                            }
116    
117                    }
118    
119                    length = i - startIndex;
120    
121                    return new String(characters, startIndex, length - skip);
122            }
123    }