001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied. See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019 package org.apache.shiro.codec;
020
021 /**
022 * Provides <a href="http://en.wikipedia.org/wiki/Base64">Base 64</a> encoding and decoding as defined by
023 * <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>.
024 * <p/>
025 * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose
026 * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein.
027 * <p/>
028 * This class was borrowed from Apache Commons Codec SVN repository (rev. 618419) with modifications
029 * to enable Base64 conversion without a full dependecny on Commons Codec. We didn't want to reinvent the wheel of
030 * great work they've done, but also didn't want to force every Shiro user to depend on the commons-codec.jar
031 * <p/>
032 * As per the Apache 2.0 license, the original copyright notice and all author and copyright information have
033 * remained in tact.
034 *
035 * @author Apache Software Foundation
036 * @author Les Hazlewood
037 * @see <a href="http://en.wikipedia.org/wiki/Base64">Wikipedia: Base 64</a>
038 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
039 * @since 0.9
040 */
041 public class Base64 {
042
043 /**
044 * Chunk size per RFC 2045 section 6.8.
045 * <p/>
046 * The character limit does not count the trailing CRLF, but counts all other characters, including any
047 * equal signs.
048 *
049 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
050 */
051 static final int CHUNK_SIZE = 76;
052
053 /**
054 * Chunk separator per RFC 2045 section 2.1.
055 *
056 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
057 */
058 static final byte[] CHUNK_SEPARATOR = "\r\n".getBytes();
059
060 /**
061 * The base length.
062 */
063 private static final int BASELENGTH = 255;
064
065 /**
066 * Lookup length.
067 */
068 private static final int LOOKUPLENGTH = 64;
069
070 /**
071 * Used to calculate the number of bits in a byte.
072 */
073 private static final int EIGHTBIT = 8;
074
075 /**
076 * Used when encoding something which has fewer than 24 bits.
077 */
078 private static final int SIXTEENBIT = 16;
079
080 /**
081 * Used to determine how many bits data contains.
082 */
083 private static final int TWENTYFOURBITGROUP = 24;
084
085 /**
086 * Used to get the number of Quadruples.
087 */
088 private static final int FOURBYTE = 4;
089
090 /**
091 * Used to test the sign of a byte.
092 */
093 private static final int SIGN = -128;
094
095 /**
096 * Byte used to pad output.
097 */
098 private static final byte PAD = (byte) '=';
099
100 /**
101 * Contains the Base64 values <code>0</code> through <code>63</code> accessed by using character encodings as
102 * indices.
103 * <p/>
104 * <p>For example, <code>base64Alphabet['+']</code> returns <code>62</code>.</p>
105 * <p/>
106 * <p>The value of undefined encodings is <code>-1</code>.</p>
107 */
108 private static final byte[] base64Alphabet = new byte[BASELENGTH];
109
110 /**
111 * <p>Contains the Base64 encodings <code>A</code> through <code>Z</code>, followed by <code>a</code> through
112 * <code>z</code>, followed by <code>0</code> through <code>9</code>, followed by <code>+</code>, and
113 * <code>/</code>.</p>
114 * <p/>
115 * <p>This array is accessed by using character values as indices.</p>
116 * <p/>
117 * <p>For example, <code>lookUpBase64Alphabet[62] </code> returns <code>'+'</code>.</p>
118 */
119 private static final byte[] lookUpBase64Alphabet = new byte[LOOKUPLENGTH];
120
121 // Populating the lookup and character arrays
122
123 static {
124 for (int i = 0; i < BASELENGTH; i++) {
125 base64Alphabet[i] = (byte) -1;
126 }
127 for (int i = 'Z'; i >= 'A'; i--) {
128 base64Alphabet[i] = (byte) (i - 'A');
129 }
130 for (int i = 'z'; i >= 'a'; i--) {
131 base64Alphabet[i] = (byte) (i - 'a' + 26);
132 }
133 for (int i = '9'; i >= '0'; i--) {
134 base64Alphabet[i] = (byte) (i - '0' + 52);
135 }
136
137 base64Alphabet['+'] = 62;
138 base64Alphabet['/'] = 63;
139
140 for (int i = 0; i <= 25; i++) {
141 lookUpBase64Alphabet[i] = (byte) ('A' + i);
142 }
143
144 for (int i = 26, j = 0; i <= 51; i++, j++) {
145 lookUpBase64Alphabet[i] = (byte) ('a' + j);
146 }
147
148 for (int i = 52, j = 0; i <= 61; i++, j++) {
149 lookUpBase64Alphabet[i] = (byte) ('0' + j);
150 }
151
152 lookUpBase64Alphabet[62] = (byte) '+';
153 lookUpBase64Alphabet[63] = (byte) '/';
154 }
155
156 /**
157 * Returns whether or not the <code>octect</code> is in the base 64 alphabet.
158 *
159 * @param octect The value to test
160 * @return <code>true</code> if the value is defined in the the base 64 alphabet, <code>false</code> otherwise.
161 */
162 private static boolean isBase64(byte octect) {
163 if (octect == PAD) {
164 return true;
165 } else //noinspection RedundantIfStatement
166 if (octect < 0 || base64Alphabet[octect] == -1) {
167 return false;
168 } else {
169 return true;
170 }
171 }
172
173 /**
174 * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet.
175 *
176 * @param arrayOctect byte array to test
177 * @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is
178 * empty; false, otherwise
179 */
180 public static boolean isBase64(byte[] arrayOctect) {
181
182 arrayOctect = discardWhitespace(arrayOctect);
183
184 int length = arrayOctect.length;
185 if (length == 0) {
186 // shouldn't a 0 length array be valid base64 data?
187 // return false;
188 return true;
189 }
190 for (int i = 0; i < length; i++) {
191 if (!isBase64(arrayOctect[i])) {
192 return false;
193 }
194 }
195 return true;
196 }
197
198 /**
199 * Discards any whitespace from a base-64 encoded block.
200 *
201 * @param data The base-64 encoded data to discard the whitespace from.
202 * @return The data, less whitespace (see RFC 2045).
203 */
204 static byte[] discardWhitespace(byte[] data) {
205 byte groomedData[] = new byte[data.length];
206 int bytesCopied = 0;
207
208 for (byte aByte : data) {
209 switch (aByte) {
210 case (byte) ' ':
211 case (byte) '\n':
212 case (byte) '\r':
213 case (byte) '\t':
214 break;
215 default:
216 groomedData[bytesCopied++] = aByte;
217 }
218 }
219
220 byte packedData[] = new byte[bytesCopied];
221
222 System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
223
224 return packedData;
225 }
226
227 /**
228 * Base64 encodes the specified byte array and then encodes it as a String using Shiro's preferred character
229 * encoding (UTF-8).
230 *
231 * @param bytes the byte array to Base64 encode.
232 * @return a UTF-8 encoded String of the resulting Base64 encoded byte array.
233 */
234 public static String encodeToString(byte[] bytes) {
235 byte[] encoded = encode(bytes);
236 return CodecSupport.toString(encoded);
237 }
238
239 /**
240 * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks
241 *
242 * @param binaryData binary data to encodeToChars
243 * @return Base64 characters chunked in 76 character blocks
244 */
245 public static byte[] encodeChunked(byte[] binaryData) {
246 return encode(binaryData, true);
247 }
248
249 /**
250 * Encodes a byte[] containing binary data, into a byte[] containing characters in the Base64 alphabet.
251 *
252 * @param pArray a byte array containing binary data
253 * @return A byte array containing only Base64 character data
254 */
255 public static byte[] encode(byte[] pArray) {
256 return encode(pArray, false);
257 }
258
259 /**
260 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
261 *
262 * @param binaryData Array containing binary data to encodeToChars.
263 * @param isChunked if <code>true</code> this encoder will chunk the base64 output into 76 character blocks
264 * @return Base64-encoded data.
265 * @throws IllegalArgumentException Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
266 */
267 public static byte[] encode(byte[] binaryData, boolean isChunked) {
268 long binaryDataLength = binaryData.length;
269 long lengthDataBits = binaryDataLength * EIGHTBIT;
270 long fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP;
271 long tripletCount = lengthDataBits / TWENTYFOURBITGROUP;
272 long encodedDataLengthLong;
273 int chunckCount = 0;
274
275 if (fewerThan24bits != 0) {
276 // data not divisible by 24 bit
277 encodedDataLengthLong = (tripletCount + 1) * 4;
278 } else {
279 // 16 or 8 bit
280 encodedDataLengthLong = tripletCount * 4;
281 }
282
283 // If the output is to be "chunked" into 76 character sections,
284 // for compliance with RFC 2045 MIME, then it is important to
285 // allow for extra length to account for the separator(s)
286 if (isChunked) {
287
288 chunckCount = (CHUNK_SEPARATOR.length == 0 ? 0 : (int) Math
289 .ceil((float) encodedDataLengthLong / CHUNK_SIZE));
290 encodedDataLengthLong += chunckCount * CHUNK_SEPARATOR.length;
291 }
292
293 if (encodedDataLengthLong > Integer.MAX_VALUE) {
294 throw new IllegalArgumentException(
295 "Input array too big, output array would be bigger than Integer.MAX_VALUE=" + Integer.MAX_VALUE);
296 }
297 int encodedDataLength = (int) encodedDataLengthLong;
298 byte encodedData[] = new byte[encodedDataLength];
299
300 byte k, l, b1, b2, b3;
301
302 int encodedIndex = 0;
303 int dataIndex;
304 int i;
305 int nextSeparatorIndex = CHUNK_SIZE;
306 int chunksSoFar = 0;
307
308 // log.debug("number of triplets = " + numberTriplets);
309 for (i = 0; i < tripletCount; i++) {
310 dataIndex = i * 3;
311 b1 = binaryData[dataIndex];
312 b2 = binaryData[dataIndex + 1];
313 b3 = binaryData[dataIndex + 2];
314
315 // log.debug("b1= " + b1 +", b2= " + b2 + ", b3= " + b3);
316
317 l = (byte) (b2 & 0x0f);
318 k = (byte) (b1 & 0x03);
319
320 byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
321 byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
322 byte val3 = ((b3 & SIGN) == 0) ? (byte) (b3 >> 6) : (byte) ((b3) >> 6 ^ 0xfc);
323
324 encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
325 // log.debug( "val2 = " + val2 );
326 // log.debug( "k4 = " + (k<<4) );
327 // log.debug( "vak = " + (val2 | (k<<4)) );
328 encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)];
329 encodedData[encodedIndex + 2] = lookUpBase64Alphabet[(l << 2) | val3];
330 encodedData[encodedIndex + 3] = lookUpBase64Alphabet[b3 & 0x3f];
331
332 encodedIndex += 4;
333
334 // If we are chunking, let's put a chunk separator down.
335 if (isChunked) {
336 // this assumes that CHUNK_SIZE % 4 == 0
337 if (encodedIndex == nextSeparatorIndex) {
338 System.arraycopy(CHUNK_SEPARATOR, 0, encodedData, encodedIndex, CHUNK_SEPARATOR.length);
339 chunksSoFar++;
340 nextSeparatorIndex = (CHUNK_SIZE * (chunksSoFar + 1)) + (chunksSoFar * CHUNK_SEPARATOR.length);
341 encodedIndex += CHUNK_SEPARATOR.length;
342 }
343 }
344 }
345
346 // form integral number of 6-bit groups
347 dataIndex = i * 3;
348
349 if (fewerThan24bits == EIGHTBIT) {
350 b1 = binaryData[dataIndex];
351 k = (byte) (b1 & 0x03);
352 // log.debug("b1=" + b1);
353 // log.debug("b1<<2 = " + (b1>>2) );
354 byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
355 encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
356 encodedData[encodedIndex + 1] = lookUpBase64Alphabet[k << 4];
357 encodedData[encodedIndex + 2] = PAD;
358 encodedData[encodedIndex + 3] = PAD;
359 } else if (fewerThan24bits == SIXTEENBIT) {
360
361 b1 = binaryData[dataIndex];
362 b2 = binaryData[dataIndex + 1];
363 l = (byte) (b2 & 0x0f);
364 k = (byte) (b1 & 0x03);
365
366 byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
367 byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
368
369 encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
370 encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)];
371 encodedData[encodedIndex + 2] = lookUpBase64Alphabet[l << 2];
372 encodedData[encodedIndex + 3] = PAD;
373 }
374
375 if (isChunked) {
376 // we also add a separator to the end of the final chunk.
377 if (chunksSoFar < chunckCount) {
378 System.arraycopy(CHUNK_SEPARATOR, 0, encodedData, encodedDataLength - CHUNK_SEPARATOR.length,
379 CHUNK_SEPARATOR.length);
380 }
381 }
382
383 return encodedData;
384 }
385
386 /**
387 * Converts the specified UTF-8 Base64 encoded String and decodes it to a resultant UTF-8 encoded string.
388 *
389 * @param base64Encoded a UTF-8 Base64 encoded String
390 * @return the decoded String, UTF-8 encoded.
391 */
392 public static String decodeToString(String base64Encoded) {
393 byte[] encodedBytes = CodecSupport.toBytes(base64Encoded);
394 return decodeToString(encodedBytes);
395 }
396
397 /**
398 * Decodes the specified Base64 encoded byte array and returns the decoded result as a UTF-8 encoded.
399 *
400 * @param base64Encoded a Base64 encoded byte array
401 * @return the decoded String, UTF-8 encoded.
402 */
403 public static String decodeToString(byte[] base64Encoded) {
404 byte[] decoded = decode(base64Encoded);
405 return CodecSupport.toString(decoded);
406 }
407
408 /**
409 * Converts the specified UTF-8 Base64 encoded String and decodes it to a raw Base64 decoded byte array.
410 *
411 * @param base64Encoded a UTF-8 Base64 encoded String
412 * @return the raw Base64 decoded byte array.
413 */
414 public static byte[] decode(String base64Encoded) {
415 byte[] bytes = CodecSupport.toBytes(base64Encoded);
416 return decode(bytes);
417 }
418
419 /**
420 * Decodes Base64 data into octects
421 *
422 * @param base64Data Byte array containing Base64 data
423 * @return Array containing decoded data.
424 */
425 public static byte[] decode(byte[] base64Data) {
426 // RFC 2045 requires that we discard ALL non-Base64 characters
427 base64Data = discardNonBase64(base64Data);
428
429 // handle the edge case, so we don't have to worry about it later
430 if (base64Data.length == 0) {
431 return new byte[0];
432 }
433
434 int numberQuadruple = base64Data.length / FOURBYTE;
435 byte decodedData[];
436 byte b1, b2, b3, b4, marker0, marker1;
437
438 // Throw away anything not in base64Data
439
440 int encodedIndex = 0;
441 int dataIndex;
442 {
443 // this sizes the output array properly - rlw
444 int lastData = base64Data.length;
445 // ignore the '=' padding
446 while (base64Data[lastData - 1] == PAD) {
447 if (--lastData == 0) {
448 return new byte[0];
449 }
450 }
451 decodedData = new byte[lastData - numberQuadruple];
452 }
453
454 for (int i = 0; i < numberQuadruple; i++) {
455 dataIndex = i * 4;
456 marker0 = base64Data[dataIndex + 2];
457 marker1 = base64Data[dataIndex + 3];
458
459 b1 = base64Alphabet[base64Data[dataIndex]];
460 b2 = base64Alphabet[base64Data[dataIndex + 1]];
461
462 if (marker0 != PAD && marker1 != PAD) {
463 // No PAD e.g 3cQl
464 b3 = base64Alphabet[marker0];
465 b4 = base64Alphabet[marker1];
466
467 decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
468 decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
469 decodedData[encodedIndex + 2] = (byte) (b3 << 6 | b4);
470 } else if (marker0 == PAD) {
471 // Two PAD e.g. 3c[Pad][Pad]
472 decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
473 } else {
474 // One PAD e.g. 3cQ[Pad]
475 b3 = base64Alphabet[marker0];
476 decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
477 decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
478 }
479 encodedIndex += 3;
480 }
481 return decodedData;
482 }
483
484 /**
485 * Discards any characters outside of the base64 alphabet, per the requirements on page 25 of RFC 2045 - "Any
486 * characters outside of the base64 alphabet are to be ignored in base64 encoded data."
487 *
488 * @param data The base-64 encoded data to groom
489 * @return The data, less non-base64 characters (see RFC 2045).
490 */
491 static byte[] discardNonBase64(byte[] data) {
492 byte groomedData[] = new byte[data.length];
493 int bytesCopied = 0;
494
495 for (byte aByte : data) {
496 if (isBase64(aByte)) {
497 groomedData[bytesCopied++] = aByte;
498 }
499 }
500
501 byte packedData[] = new byte[bytesCopied];
502
503 System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
504
505 return packedData;
506 }
507
508 }