集册 Java实例教程 用UTF写入字符串的一部分

用UTF写入字符串的一部分

欢马劈雪     最近更新时间:2020-01-02 10:19:05

547
将采用UTF-8编码的字符串的一部分写入数据流。
/**来自 
 时 代 J     a    v  a - nowjava.com**/

/*

 Written in 2013 by Peter O.

 Any copyright is dedicated to the Public Domain.

 http://creativecommons.org/publicdomain/zero/1.0/

 If you like this, you should donate to Peter O.

 at: http://upokecenter.dreamhosters.com/articles/donate-now-2/

 */

//package com.nowjava;

import java.io.*;


public class Main {

    private static final int StreamedStringBufferLength = 4096;


    /**

     * Writes a portion of a string in UTF-8 encoding to a data stream.

     * @param str A string to write.

     * @param offset The zero-based index where the string portion to write begins.

     * @param length The length of the string portion to write.

     * @param stream A writable data stream.

     * @param replace If true, replaces unpaired surrogate code points with the

     * replacement character (U + FFFD). If false, stops processing when an

     * unpaired surrogate code point is seen.

     * @return 0 if the entire string portion was written; or -1 if the string

     * portion contains an unpaired surrogate code point and {@code replace}

     * is false.

     * @throws NullPointerException The parameter {@code str} is null or {@code

     * stream} is null.

     * @throws IllegalArgumentException The parameter {@code offset} is less than 0,

     * {@code length} is less than 0, or {@code offset} plus {@code length}

     * is greater than the string's length.

     * @throws java.io.IOException An I/O error occurred.

     */

    public static int WriteUtf8(String str, int offset, int length,

            OutputStream stream, boolean replace)

            throws java.io.IOException {

        return WriteUtf8(str, offset, length, stream, replace, false);

    }


    /**

     * Writes a portion of a string in UTF-8 encoding to a data stream.

     * @param str A string to write.

     * @param offset The zero-based index where the string portion to write begins.

     * @param length The length of the string portion to write.

     * @param stream A writable data stream.

     * @param replace If true, replaces unpaired surrogate code points with the

     * replacement character (U + FFFD). If false, stops processing when an

     * unpaired surrogate code point is seen.

     * @param lenientLineBreaks If true, replaces carriage return (CR) not followed

     * by line feed (LF) and LF not preceded by CR with CR-LF pairs.

     * @return 0 if the entire string portion was written; or -1 if the string

     * portion contains an unpaired surrogate code point and {@code replace}

     * is false.

     * @throws NullPointerException The parameter {@code str} is null or {@code

     * stream} is null.

     * @throws IllegalArgumentException The parameter {@code offset} is less than 0,

     * {@code length} is less than 0, or {@code offset} plus {@code length}

     * is greater than the string's length.

     * @throws java.io.IOException An I/O error occurred.

     *///n o w j a v a . c o m 提 供

    public static int WriteUtf8(String str, int offset, int length,

            OutputStream stream, boolean replace, boolean lenientLineBreaks)

            throws java.io.IOException {

        if (stream == null) {

            throw new NullPointerException("stream");

        }

        if (str == null) {

            throw new NullPointerException("str");

        }

        if (offset < 0) {

            throw new IllegalArgumentException("offset (" + offset

                    + ") is less than " + "0");

        }

        if (offset > str.length()) {

            throw new IllegalArgumentException("offset (" + offset

                    + ") is more than " + str.length());

        }

        if (length < 0) {

            throw new IllegalArgumentException("length (" + length

                    + ") is less than " + "0");

        }

        if (length > str.length()) {

            throw new IllegalArgumentException("length (" + length

                    + ") is more than " + str.length());

        }

        if (str.length() - offset < length) {

            throw new IllegalArgumentException(

                    "str.length() minus offset (" + (str.length() - offset)

                            + ") is less than " + length);

        }

        byte[] bytes;

        int retval = 0;

        bytes = new byte[StreamedStringBufferLength];

        int byteIndex = 0;

        int endIndex = offset + length;

        for (int index = offset; index < endIndex; ++index) {

            int c = str.charAt(index);

            if (c <= 0x7f) {

                if (lenientLineBreaks) {

                    if (c == 0x0d

                            && (index + 1 >= endIndex || str

                                    .charAt(index + 1) != 0x0a)) {

                        // bare CR, convert to CRLF

                        if (byteIndex + 2 > StreamedStringBufferLength) {

                            // Write bytes retrieved so far

                            stream.write(bytes, 0, byteIndex);

                            byteIndex = 0;

                        }

                        bytes[byteIndex++] = 0x0d;

                        bytes[byteIndex++] = 0x0a;

                        continue;

                    }

                    if (c == 0x0a) {

                        // bare LF, convert to CRLF

                        if (byteIndex + 2 > StreamedStringBufferLength) {

                            // Write bytes retrieved so far

                            stream.write(bytes, 0, byteIndex);

                            byteIndex = 0;

                        }

                        bytes[byteIndex++] = 0x0d;

                        bytes[byteIndex++] = 0x0a;

                        continue;

                    }

                }

                if (byteIndex >= StreamedStringBufferLength) {

                    // Write bytes retrieved so far

                    stream.write(bytes, 0, byteIndex);

                    byteIndex = 0;

                }

                bytes[byteIndex++] = (byte) c;

            } else if (c <= 0x7ff) {

                if (byteIndex + 2 > StreamedStringBufferLength) {

                    // Write bytes retrieved so far

                    stream.write(bytes, 0, byteIndex);

                    byteIndex = 0;

                }

                bytes[byteIndex++] = (byte) (0xc0 | ((c >> 6) & 0x1f));

                bytes[byteIndex++] = (byte) (0x80 | (c & 0x3f));

            } else {

                if ((c & 0xfc00) == 0xd800 && index + 1 < endIndex

                        && str.charAt(index + 1) >= 0xdc00

                        && str.charAt(index + 1) <= 0xdfff) {

                    // Get the Unicode code point for the surrogate pair

                    c = 0x10000 + ((c - 0xd800) << 10)

                            + (str.charAt(index + 1) - 0xdc00);

                    ++index;

                } else if ((c & 0xf800) == 0xd800) {

                    // unpaired surrogate

                    if (!replace) {

                        retval = -1;

                        break; // write bytes read so far

                    }

                    c = 0xfffd;

                }

                if (c <= 0xffff) {

                    if (byteIndex + 3 > StreamedStringBufferLength) {

                        // Write bytes retrieved so far

                        stream.write(bytes, 0, byteIndex);

                        byteIndex = 0;

                    }

                    bytes[byteIndex++] = (byte) (0xe0 | ((c >> 12) & 0x0f));

                    bytes[byteIndex++] = (byte) (0x80 | ((c >> 6) & 0x3f));

                    bytes[byteIndex++] = (byte) (0x80 | (c & 0x3f));

                } else {

                    if (byteIndex + 4 > StreamedStringBufferLength) {

                        // Write bytes retrieved so far

                        stream.write(byt
展开阅读全文