001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing,
013     * software distributed under the License is distributed on an
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     * KIND, either express or implied.  See the License for the
016     * specific language governing permissions and limitations
017     * under the License.
018     */
019    package org.apache.shiro.codec;
020    
021    /**
022     * Provides <a href="http://en.wikipedia.org/wiki/Base64">Base 64</a> encoding and decoding as defined by
023     * <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>.
024     * <p/>
025     * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose
026     * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein.
027     * <p/>
028     * This class was borrowed from Apache Commons Codec SVN repository (rev. 618419) with modifications
029     * to enable Base64 conversion without a full dependecny on Commons Codec.  We didn't want to reinvent the wheel of
030     * great work they've done, but also didn't want to force every Shiro user to depend on the commons-codec.jar
031     * <p/>
032     * As per the Apache 2.0 license, the original copyright notice and all author and copyright information have
033     * remained in tact.
034     *
035     * @author Apache Software Foundation
036     * @author Les Hazlewood
037     * @see <a href="http://en.wikipedia.org/wiki/Base64">Wikipedia: Base 64</a>
038     * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
039     * @since 0.9
040     */
041    public class Base64 {
042    
043        /**
044         * Chunk size per RFC 2045 section 6.8.
045         * <p/>
046         * The character limit does not count the trailing CRLF, but counts all other characters, including any
047         * equal signs.
048         *
049         * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
050         */
051        static final int CHUNK_SIZE = 76;
052    
053        /**
054         * Chunk separator per RFC 2045 section 2.1.
055         *
056         * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
057         */
058        static final byte[] CHUNK_SEPARATOR = "\r\n".getBytes();
059    
060        /**
061         * The base length.
062         */
063        private static final int BASELENGTH = 255;
064    
065        /**
066         * Lookup length.
067         */
068        private static final int LOOKUPLENGTH = 64;
069    
070        /**
071         * Used to calculate the number of bits in a byte.
072         */
073        private static final int EIGHTBIT = 8;
074    
075        /**
076         * Used when encoding something which has fewer than 24 bits.
077         */
078        private static final int SIXTEENBIT = 16;
079    
080        /**
081         * Used to determine how many bits data contains.
082         */
083        private static final int TWENTYFOURBITGROUP = 24;
084    
085        /**
086         * Used to get the number of Quadruples.
087         */
088        private static final int FOURBYTE = 4;
089    
090        /**
091         * Used to test the sign of a byte.
092         */
093        private static final int SIGN = -128;
094    
095        /**
096         * Byte used to pad output.
097         */
098        private static final byte PAD = (byte) '=';
099    
100        /**
101         * Contains the Base64 values <code>0</code> through <code>63</code> accessed by using character encodings as
102         * indices.
103         * <p/>
104         * <p>For example, <code>base64Alphabet['+']</code> returns <code>62</code>.</p>
105         * <p/>
106         * <p>The value of undefined encodings is <code>-1</code>.</p>
107         */
108        private static final byte[] base64Alphabet = new byte[BASELENGTH];
109    
110        /**
111         * <p>Contains the Base64 encodings <code>A</code> through <code>Z</code>, followed by <code>a</code> through
112         * <code>z</code>, followed by <code>0</code> through <code>9</code>, followed by <code>+</code>, and
113         * <code>/</code>.</p>
114         * <p/>
115         * <p>This array is accessed by using character values as indices.</p>
116         * <p/>
117         * <p>For example, <code>lookUpBase64Alphabet[62] </code> returns <code>'+'</code>.</p>
118         */
119        private static final byte[] lookUpBase64Alphabet = new byte[LOOKUPLENGTH];
120    
121        // Populating the lookup and character arrays
122    
123        static {
124            for (int i = 0; i < BASELENGTH; i++) {
125                base64Alphabet[i] = (byte) -1;
126            }
127            for (int i = 'Z'; i >= 'A'; i--) {
128                base64Alphabet[i] = (byte) (i - 'A');
129            }
130            for (int i = 'z'; i >= 'a'; i--) {
131                base64Alphabet[i] = (byte) (i - 'a' + 26);
132            }
133            for (int i = '9'; i >= '0'; i--) {
134                base64Alphabet[i] = (byte) (i - '0' + 52);
135            }
136    
137            base64Alphabet['+'] = 62;
138            base64Alphabet['/'] = 63;
139    
140            for (int i = 0; i <= 25; i++) {
141                lookUpBase64Alphabet[i] = (byte) ('A' + i);
142            }
143    
144            for (int i = 26, j = 0; i <= 51; i++, j++) {
145                lookUpBase64Alphabet[i] = (byte) ('a' + j);
146            }
147    
148            for (int i = 52, j = 0; i <= 61; i++, j++) {
149                lookUpBase64Alphabet[i] = (byte) ('0' + j);
150            }
151    
152            lookUpBase64Alphabet[62] = (byte) '+';
153            lookUpBase64Alphabet[63] = (byte) '/';
154        }
155    
156        /**
157         * Returns whether or not the <code>octect</code> is in the base 64 alphabet.
158         *
159         * @param octect The value to test
160         * @return <code>true</code> if the value is defined in the the base 64 alphabet, <code>false</code> otherwise.
161         */
162        private static boolean isBase64(byte octect) {
163            if (octect == PAD) {
164                return true;
165            } else //noinspection RedundantIfStatement
166                if (octect < 0 || base64Alphabet[octect] == -1) {
167                    return false;
168                } else {
169                    return true;
170                }
171        }
172    
173        /**
174         * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet.
175         *
176         * @param arrayOctect byte array to test
177         * @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is
178         *         empty; false, otherwise
179         */
180        public static boolean isBase64(byte[] arrayOctect) {
181    
182            arrayOctect = discardWhitespace(arrayOctect);
183    
184            int length = arrayOctect.length;
185            if (length == 0) {
186                // shouldn't a 0 length array be valid base64 data?
187                // return false;
188                return true;
189            }
190            for (int i = 0; i < length; i++) {
191                if (!isBase64(arrayOctect[i])) {
192                    return false;
193                }
194            }
195            return true;
196        }
197    
198        /**
199         * Discards any whitespace from a base-64 encoded block.
200         *
201         * @param data The base-64 encoded data to discard the whitespace from.
202         * @return The data, less whitespace (see RFC 2045).
203         */
204        static byte[] discardWhitespace(byte[] data) {
205            byte groomedData[] = new byte[data.length];
206            int bytesCopied = 0;
207    
208            for (byte aByte : data) {
209                switch (aByte) {
210                    case (byte) ' ':
211                    case (byte) '\n':
212                    case (byte) '\r':
213                    case (byte) '\t':
214                        break;
215                    default:
216                        groomedData[bytesCopied++] = aByte;
217                }
218            }
219    
220            byte packedData[] = new byte[bytesCopied];
221    
222            System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
223    
224            return packedData;
225        }
226    
227        /**
228         * Base64 encodes the specified byte array and then encodes it as a String using Shiro's preferred character
229         * encoding (UTF-8).
230         *
231         * @param bytes the byte array to Base64 encode.
232         * @return a UTF-8 encoded String of the resulting Base64 encoded byte array.
233         */
234        public static String encodeToString(byte[] bytes) {
235            byte[] encoded = encode(bytes);
236            return CodecSupport.toString(encoded);
237        }
238    
239        /**
240         * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks
241         *
242         * @param binaryData binary data to encodeToChars
243         * @return Base64 characters chunked in 76 character blocks
244         */
245        public static byte[] encodeChunked(byte[] binaryData) {
246            return encode(binaryData, true);
247        }
248    
249        /**
250         * Encodes a byte[] containing binary data, into a byte[] containing characters in the Base64 alphabet.
251         *
252         * @param pArray a byte array containing binary data
253         * @return A byte array containing only Base64 character data
254         */
255        public static byte[] encode(byte[] pArray) {
256            return encode(pArray, false);
257        }
258    
259        /**
260         * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
261         *
262         * @param binaryData Array containing binary data to encodeToChars.
263         * @param isChunked  if <code>true</code> this encoder will chunk the base64 output into 76 character blocks
264         * @return Base64-encoded data.
265         * @throws IllegalArgumentException Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
266         */
267        public static byte[] encode(byte[] binaryData, boolean isChunked) {
268            long binaryDataLength = binaryData.length;
269            long lengthDataBits = binaryDataLength * EIGHTBIT;
270            long fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP;
271            long tripletCount = lengthDataBits / TWENTYFOURBITGROUP;
272            long encodedDataLengthLong;
273            int chunckCount = 0;
274    
275            if (fewerThan24bits != 0) {
276                // data not divisible by 24 bit
277                encodedDataLengthLong = (tripletCount + 1) * 4;
278            } else {
279                // 16 or 8 bit
280                encodedDataLengthLong = tripletCount * 4;
281            }
282    
283            // If the output is to be "chunked" into 76 character sections,
284            // for compliance with RFC 2045 MIME, then it is important to
285            // allow for extra length to account for the separator(s)
286            if (isChunked) {
287    
288                chunckCount = (CHUNK_SEPARATOR.length == 0 ? 0 : (int) Math
289                        .ceil((float) encodedDataLengthLong / CHUNK_SIZE));
290                encodedDataLengthLong += chunckCount * CHUNK_SEPARATOR.length;
291            }
292    
293            if (encodedDataLengthLong > Integer.MAX_VALUE) {
294                throw new IllegalArgumentException(
295                        "Input array too big, output array would be bigger than Integer.MAX_VALUE=" + Integer.MAX_VALUE);
296            }
297            int encodedDataLength = (int) encodedDataLengthLong;
298            byte encodedData[] = new byte[encodedDataLength];
299    
300            byte k, l, b1, b2, b3;
301    
302            int encodedIndex = 0;
303            int dataIndex;
304            int i;
305            int nextSeparatorIndex = CHUNK_SIZE;
306            int chunksSoFar = 0;
307    
308            // log.debug("number of triplets = " + numberTriplets);
309            for (i = 0; i < tripletCount; i++) {
310                dataIndex = i * 3;
311                b1 = binaryData[dataIndex];
312                b2 = binaryData[dataIndex + 1];
313                b3 = binaryData[dataIndex + 2];
314    
315                // log.debug("b1= " + b1 +", b2= " + b2 + ", b3= " + b3);
316    
317                l = (byte) (b2 & 0x0f);
318                k = (byte) (b1 & 0x03);
319    
320                byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
321                byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
322                byte val3 = ((b3 & SIGN) == 0) ? (byte) (b3 >> 6) : (byte) ((b3) >> 6 ^ 0xfc);
323    
324                encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
325                // log.debug( "val2 = " + val2 );
326                // log.debug( "k4 = " + (k<<4) );
327                // log.debug( "vak = " + (val2 | (k<<4)) );
328                encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)];
329                encodedData[encodedIndex + 2] = lookUpBase64Alphabet[(l << 2) | val3];
330                encodedData[encodedIndex + 3] = lookUpBase64Alphabet[b3 & 0x3f];
331    
332                encodedIndex += 4;
333    
334                // If we are chunking, let's put a chunk separator down.
335                if (isChunked) {
336                    // this assumes that CHUNK_SIZE % 4 == 0
337                    if (encodedIndex == nextSeparatorIndex) {
338                        System.arraycopy(CHUNK_SEPARATOR, 0, encodedData, encodedIndex, CHUNK_SEPARATOR.length);
339                        chunksSoFar++;
340                        nextSeparatorIndex = (CHUNK_SIZE * (chunksSoFar + 1)) + (chunksSoFar * CHUNK_SEPARATOR.length);
341                        encodedIndex += CHUNK_SEPARATOR.length;
342                    }
343                }
344            }
345    
346            // form integral number of 6-bit groups
347            dataIndex = i * 3;
348    
349            if (fewerThan24bits == EIGHTBIT) {
350                b1 = binaryData[dataIndex];
351                k = (byte) (b1 & 0x03);
352                // log.debug("b1=" + b1);
353                // log.debug("b1<<2 = " + (b1>>2) );
354                byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
355                encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
356                encodedData[encodedIndex + 1] = lookUpBase64Alphabet[k << 4];
357                encodedData[encodedIndex + 2] = PAD;
358                encodedData[encodedIndex + 3] = PAD;
359            } else if (fewerThan24bits == SIXTEENBIT) {
360    
361                b1 = binaryData[dataIndex];
362                b2 = binaryData[dataIndex + 1];
363                l = (byte) (b2 & 0x0f);
364                k = (byte) (b1 & 0x03);
365    
366                byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
367                byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
368    
369                encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
370                encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)];
371                encodedData[encodedIndex + 2] = lookUpBase64Alphabet[l << 2];
372                encodedData[encodedIndex + 3] = PAD;
373            }
374    
375            if (isChunked) {
376                // we also add a separator to the end of the final chunk.
377                if (chunksSoFar < chunckCount) {
378                    System.arraycopy(CHUNK_SEPARATOR, 0, encodedData, encodedDataLength - CHUNK_SEPARATOR.length,
379                            CHUNK_SEPARATOR.length);
380                }
381            }
382    
383            return encodedData;
384        }
385    
386        /**
387         * Converts the specified UTF-8 Base64 encoded String and decodes it to a resultant UTF-8 encoded string.
388         *
389         * @param base64Encoded a UTF-8 Base64 encoded String
390         * @return the decoded String, UTF-8 encoded.
391         */
392        public static String decodeToString(String base64Encoded) {
393            byte[] encodedBytes = CodecSupport.toBytes(base64Encoded);
394            return decodeToString(encodedBytes);
395        }
396    
397        /**
398         * Decodes the specified Base64 encoded byte array and returns the decoded result as a UTF-8 encoded.
399         *
400         * @param base64Encoded a Base64 encoded byte array
401         * @return the decoded String, UTF-8 encoded.
402         */
403        public static String decodeToString(byte[] base64Encoded) {
404            byte[] decoded = decode(base64Encoded);
405            return CodecSupport.toString(decoded);
406        }
407    
408        /**
409         * Converts the specified UTF-8 Base64 encoded String and decodes it to a raw Base64 decoded byte array.
410         *
411         * @param base64Encoded a UTF-8 Base64 encoded String
412         * @return the raw Base64 decoded byte array.
413         */
414        public static byte[] decode(String base64Encoded) {
415            byte[] bytes = CodecSupport.toBytes(base64Encoded);
416            return decode(bytes);
417        }
418    
419        /**
420         * Decodes Base64 data into octects
421         *
422         * @param base64Data Byte array containing Base64 data
423         * @return Array containing decoded data.
424         */
425        public static byte[] decode(byte[] base64Data) {
426            // RFC 2045 requires that we discard ALL non-Base64 characters
427            base64Data = discardNonBase64(base64Data);
428    
429            // handle the edge case, so we don't have to worry about it later
430            if (base64Data.length == 0) {
431                return new byte[0];
432            }
433    
434            int numberQuadruple = base64Data.length / FOURBYTE;
435            byte decodedData[];
436            byte b1, b2, b3, b4, marker0, marker1;
437    
438            // Throw away anything not in base64Data
439    
440            int encodedIndex = 0;
441            int dataIndex;
442            {
443                // this sizes the output array properly - rlw
444                int lastData = base64Data.length;
445                // ignore the '=' padding
446                while (base64Data[lastData - 1] == PAD) {
447                    if (--lastData == 0) {
448                        return new byte[0];
449                    }
450                }
451                decodedData = new byte[lastData - numberQuadruple];
452            }
453    
454            for (int i = 0; i < numberQuadruple; i++) {
455                dataIndex = i * 4;
456                marker0 = base64Data[dataIndex + 2];
457                marker1 = base64Data[dataIndex + 3];
458    
459                b1 = base64Alphabet[base64Data[dataIndex]];
460                b2 = base64Alphabet[base64Data[dataIndex + 1]];
461    
462                if (marker0 != PAD && marker1 != PAD) {
463                    // No PAD e.g 3cQl
464                    b3 = base64Alphabet[marker0];
465                    b4 = base64Alphabet[marker1];
466    
467                    decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
468                    decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
469                    decodedData[encodedIndex + 2] = (byte) (b3 << 6 | b4);
470                } else if (marker0 == PAD) {
471                    // Two PAD e.g. 3c[Pad][Pad]
472                    decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
473                } else {
474                    // One PAD e.g. 3cQ[Pad]
475                    b3 = base64Alphabet[marker0];
476                    decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
477                    decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
478                }
479                encodedIndex += 3;
480            }
481            return decodedData;
482        }
483    
484        /**
485         * Discards any characters outside of the base64 alphabet, per the requirements on page 25 of RFC 2045 - "Any
486         * characters outside of the base64 alphabet are to be ignored in base64 encoded data."
487         *
488         * @param data The base-64 encoded data to groom
489         * @return The data, less non-base64 characters (see RFC 2045).
490         */
491        static byte[] discardNonBase64(byte[] data) {
492            byte groomedData[] = new byte[data.length];
493            int bytesCopied = 0;
494    
495            for (byte aByte : data) {
496                if (isBase64(aByte)) {
497                    groomedData[bytesCopied++] = aByte;
498                }
499            }
500    
501            byte packedData[] = new byte[bytesCopied];
502    
503            System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
504    
505            return packedData;
506        }
507    
508    }