001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io; 018 019import java.io.Serializable; 020import java.util.Locale; 021 022/** 023 * Byte Order Mark (BOM) representation - see {@link org.apache.commons.io.input.BOMInputStream}. 024 * 025 * @see org.apache.commons.io.input.BOMInputStream 026 * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia: Byte Order Mark</a> 027 * @see <a href="http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing">W3C: Autodetection of Character Encodings 028 * (Non-Normative)</a> 029 * @since 2.0 030 */ 031public class ByteOrderMark implements Serializable { 032 033 private static final long serialVersionUID = 1L; 034 035 /** UTF-8 BOM. */ 036 public static final ByteOrderMark UTF_8 = new ByteOrderMark("UTF-8", 0xEF, 0xBB, 0xBF); 037 038 /** UTF-16BE BOM (Big-Endian). */ 039 public static final ByteOrderMark UTF_16BE = new ByteOrderMark("UTF-16BE", 0xFE, 0xFF); 040 041 /** UTF-16LE BOM (Little-Endian). */ 042 public static final ByteOrderMark UTF_16LE = new ByteOrderMark("UTF-16LE", 0xFF, 0xFE); 043 044 /** 045 * UTF-32BE BOM (Big-Endian). 046 * 047 * @since 2.2 048 */ 049 public static final ByteOrderMark UTF_32BE = new ByteOrderMark("UTF-32BE", 0x00, 0x00, 0xFE, 0xFF); 050 051 /** 052 * UTF-32LE BOM (Little-Endian). 053 * 054 * @since 2.2 055 */ 056 public static final ByteOrderMark UTF_32LE = new ByteOrderMark("UTF-32LE", 0xFF, 0xFE, 0x00, 0x00); 057 058 /** 059 * Unicode BOM character; external form depends on the encoding. 060 * 061 * @see <a href="http://unicode.org/faq/utf_bom.html#BOM">Byte Order Mark (BOM) FAQ</a> 062 * @since 2.5 063 */ 064 public static final char UTF_BOM = '\uFEFF'; 065 066 private final String charsetName; 067 private final int[] bytes; 068 069 /** 070 * Constructs a new BOM. 071 * 072 * @param charsetName The name of the charset the BOM represents 073 * @param bytes The BOM's bytes 074 * @throws IllegalArgumentException if the charsetName is null or 075 * zero length 076 * @throws IllegalArgumentException if the bytes are null or zero 077 * length 078 */ 079 public ByteOrderMark(final String charsetName, final int... bytes) { 080 if (charsetName == null || charsetName.isEmpty()) { 081 throw new IllegalArgumentException("No charsetName specified"); 082 } 083 if (bytes == null || bytes.length == 0) { 084 throw new IllegalArgumentException("No bytes specified"); 085 } 086 this.charsetName = charsetName; 087 this.bytes = new int[bytes.length]; 088 System.arraycopy(bytes, 0, this.bytes, 0, bytes.length); 089 } 090 091 /** 092 * Gets the name of the {@link java.nio.charset.Charset} the BOM represents. 093 * 094 * @return the character set name 095 */ 096 public String getCharsetName() { 097 return charsetName; 098 } 099 100 /** 101 * Gets the length of the BOM's bytes. 102 * 103 * @return the length of the BOM's bytes 104 */ 105 public int length() { 106 return bytes.length; 107 } 108 109 /** 110 * Gets the byte at the specified position. 111 * 112 * @param pos The position 113 * @return The specified byte 114 */ 115 public int get(final int pos) { 116 return bytes[pos]; 117 } 118 119 /** 120 * Gets a copy of the BOM's bytes. 121 * 122 * @return a copy of the BOM's bytes 123 */ 124 public byte[] getBytes() { 125 final byte[] copy = IOUtils.byteArray(bytes.length); 126 for (int i = 0; i < bytes.length; i++) { 127 copy[i] = (byte)bytes[i]; 128 } 129 return copy; 130 } 131 132 /** 133 * Indicates if this BOM's bytes equals another. 134 * 135 * @param obj The object to compare to 136 * @return true if the bom's bytes are equal, otherwise 137 * false 138 */ 139 @Override 140 public boolean equals(final Object obj) { 141 if (!(obj instanceof ByteOrderMark)) { 142 return false; 143 } 144 final ByteOrderMark bom = (ByteOrderMark)obj; 145 if (bytes.length != bom.length()) { 146 return false; 147 } 148 for (int i = 0; i < bytes.length; i++) { 149 if (bytes[i] != bom.get(i)) { 150 return false; 151 } 152 } 153 return true; 154 } 155 156 /** 157 * Computes the hashcode for this BOM. 158 * 159 * @return the hashcode for this BOM. 160 * @see java.lang.Object#hashCode() 161 */ 162 @Override 163 public int hashCode() { 164 int hashCode = getClass().hashCode(); 165 for (final int b : bytes) { 166 hashCode += b; 167 } 168 return hashCode; 169 } 170 171 /** 172 * Converts this instance to a String representation of the BOM. 173 * 174 * @return the length of the BOM's bytes 175 */ 176 @Override 177 public String toString() { 178 final StringBuilder builder = new StringBuilder(); 179 builder.append(getClass().getSimpleName()); 180 builder.append('['); 181 builder.append(charsetName); 182 builder.append(": "); 183 for (int i = 0; i < bytes.length; i++) { 184 if (i > 0) { 185 builder.append(","); 186 } 187 builder.append("0x"); 188 builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase(Locale.ROOT)); 189 } 190 builder.append(']'); 191 return builder.toString(); 192 } 193 194}