001 /*--------------------------------------------------------------------------+ 002 $Id: LineSplitter.java 26268 2010-02-18 10:44:30Z juergens $ 003 | | 004 | Copyright 2005-2010 Technische Universitaet Muenchen | 005 | | 006 | Licensed under the Apache License, Version 2.0 (the "License"); | 007 | you may not use this file except in compliance with the License. | 008 | You may obtain a copy of the License at | 009 | | 010 | http://www.apache.org/licenses/LICENSE-2.0 | 011 | | 012 | Unless required by applicable law or agreed to in writing, software | 013 | distributed under the License is distributed on an "AS IS" BASIS, | 014 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 015 | See the License for the specific language governing permissions and | 016 | limitations under the License. | 017 +--------------------------------------------------------------------------*/ 018 package edu.tum.cs.commons.string; 019 020 /** 021 * This class is used to split a string in lines. 022 * <p> 023 * <b>Note:</b> According to tests I performed this is the fastest method to 024 * split a string. It is about nine times faster than the regex-bases split 025 * with: 026 * 027 * <pre> 028 * Pattern pattern = Pattern.compile("\r\n|\r|\n"); 029 * pattern.split(content); 030 * </pre> 031 * 032 * @author Florian Deissenboeck 033 * @author $Author: juergens $ 034 * 035 * @version $Revision: 26268 $ 036 * @levd.rating GREEN Hash: F99C8B9E8F156988EBFA29796D5D1AEF 037 */ 038 public class LineSplitter { 039 040 /** Character array. */ 041 private char[] characters; 042 043 /** Starting index. */ 044 private int startIndex; 045 046 /** Length of the line to be returned from {@link #getNextLine()}. */ 047 private int length; 048 049 /** 050 * Set the string to split. 051 * 052 * @param content 053 * the string to split. if string is <code>null</code> or the 054 * empty string, {@link #getNextLine()} will return 055 * <code>null</code> 056 * 057 */ 058 public void setContent(String content) { 059 if (content == null) { 060 characters = null; 061 } else { 062 characters = content.toCharArray(); 063 } 064 startIndex = 0; 065 length = 0; 066 } 067 068 /** 069 * Obtain next identified line. 070 * 071 * @return <code>null</code> if all lines were returned. On returning the 072 * last line all references to the input string are deleted. So it 073 * is free for garbage collection. 074 */ 075 public String getNextLine() { 076 077 if (characters == null) { 078 return null; 079 } 080 081 startIndex = startIndex + length; 082 083 if (startIndex >= characters.length) { 084 085 // delete reference to array to allow garbage collection 086 characters = null; 087 return null; 088 } 089 090 // length to skip may vary due to the length of the line separator (\r, 091 // \n or \r\n) 092 int skip = 0; 093 094 int i = startIndex; 095 096 while (skip == 0 && i < characters.length) { 097 char c = characters[i]; 098 099 i++; 100 101 // Skip newlines. 102 if (c == '\n') { 103 skip = 1; 104 } 105 106 // Skip newlines. 107 if (c == '\r') { 108 skip = 1; 109 if (i < characters.length) { 110 if (characters[i] == '\n') { 111 skip = 2; 112 i++; 113 } 114 } 115 } 116 117 } 118 119 length = i - startIndex; 120 121 return new String(characters, startIndex, length - skip); 122 } 123 }