集册 Java实例教程 获取一系列核苷酸的氨基酸翻译。

获取一系列核苷酸的氨基酸翻译。

欢马劈雪     最近更新时间:2020-01-02 10:19:05

422
获取一系列核苷酸的氨基酸翻译。

/*

 **  DNAUtils

 **  (c) Copyright 1997, Neomorphic Sofware, Inc.

 **  All Rights Reserved

 **

 **  CONFIDENTIAL

 **  DO NOT DISTRIBUTE

 **

 **  File: DNAUtils.java

 **

 */

import java.util.List;/* from N o w J a v a . c o m*/

import java.util.ArrayList;


public class Main{

    public static final int[] FRAME_MAPPING = { 0, 0, 0, 1, 2, -0, -1, -2 };

    public static final int ONE_LETTER_CODE = 100;

    public static final int THREE_LETTER_CODE = 101;

    /** Genetic Code in 1-character amino acid codes. by default set to default

     genetic code 1 */

    protected static String aa1[][][] = aa1Default;

    /** Genetic Code in 3-character amino acid codes, default set to gen code 1 */

    protected static String aa3[][][] = aa3Default;

    /**

     * gets a translation into amino acids of a string of nucleotides.

     *

     * @param s represents the string of nucleotides.

     * @param frametype FRAME_ONE, FRAME_TWO, or FRAME_THREE.

     *                  For reverse strand frames, 

     *                  translate the reverse complement.

     *                  Then reverse that result.

     * @param codetype ONE_LETTER_CODE, or THREE_LETTER_CODE

     *                 indicating how many letters should encode each amino acid.

     * @return a representation of the amino acid sequence

     *         encoded by the given nucleotide sequence.

     */

    public static String translate(String s, int frametype, int codetype) {
    /**
     from
    * 时代Java 
    **/

        return translate(s, frametype, codetype, null, null, null);

    }

    /**

     * gets a translation into amino acids of a string of nucleotides.

     *

     * @param s represents the string of nucleotides.

     * @param frametype FRAME_ONE, FRAME_TWO, or FRAME_THREE.

     *                  For reverse strand frames, 

     *                  translate the reverse complement.

     *                  Then reverse that result.

     * @param codetype ONE_LETTER_CODE, or THREE_LETTER_CODE

     *                 indicating how many letters should encode each amino acid.

     * @param initial_string what goes at front of entire translation

     * @param pre_string what goes before every amino acid

     * @param post_string what goes after every amino acid

     * @return a representation of the amino acid sequence

     *         encoded by the given nucleotide sequence.

     */

    public static String translate(String s, int frametype, int codetype,

            String initial_string, String pre_string, String post_string) {

        String result = null;

        if (codetype == ONE_LETTER_CODE || codetype == 1) {

            result = translate(s, frametype, getGeneticCodeOne(),

                    initial_string, pre_string, post_string);

        } else if (codetype == THREE_LETTER_CODE || codetype == 3) {

            result = translate(s, frametype, getGeneticCodeThree(),

                    initial_string, pre_string, post_string);

        }

        return result;

    }

    /**

     * gets a translation into amino acids of a string of nucleotides.

     *

     * @param s represents the string of nucleotides.

     * @param frametype FRAME_ONE, FRAME_TWO, or FRAME_THREE.

     *                  For reverse strand frames, 

     *                  translate the reverse complement.

     *                  Then reverse that result.

     * @param genetic_code the result of one of the getGeneticCode methods

     *                     of this class.

     * @param initial_string what goes at front of entire translation

     * @param pre_string what goes before every amino acid

     * @param post_string what goes after every amino acid

     * @return a representation of the amino acid sequence

     *         encoded by the given nucleotide sequence.

     * @see #getGeneticCodeOne

     * @see #getGeneticCodeThree

     */

    // currently only translates in +1, +2, +3 

    // for -1, -2, -3: translate reverse complement, then reverse result

    // initial_string is what goes at front of entire translation

    // pre_string is what goes before every amino acid

    // post_string is what goes after every amino acid

    public static String translate(String s, int frametype,

            String[][][] genetic_code, String initial_string,

            String pre_string, String post_string) {

        int frame = FRAME_MAPPING[frametype];


        int length = s.length();

        byte[] basenums = new byte[length];

        for (int i = 0; i < length; i++) {

            switch (s.charAt(i)) {

            case 'A':

            case 'a':

                basenums[i] = 0;

                break;

            case 'C':

            case 'c':

                basenums[i] = 1;

                break;

            case 'G':

            case 'g':

                basenums[i] = 2;

                break;

            case 'T':

            case 't':

                basenums[i] = 3;

                break;

            default:

                basenums[i] = 4;

                break;

            }

        }


        String residue;

        //    int residue_charsize = 3;

        int residue_charsize = genetic_code[0][0][0].length();

        if (pre_string != null) {

            residue_charsize += pre_string.length();

        }

        if (post_string != null) {

            residue_charsize += post_string.length();

        }


        StringBuffer amino_acids = new StringBuffer(length);

        // StringBuffer amino_acids = 

        //new StringBuffer(((int)(length-(int)Math.abs(frame))/3)*residue_charsize;


        if (initial_string != null)

            amino_acids.append(initial_string);


        // checking for no spaces, can build non-spaced faster by avoiding 

        //     amino_acids.append("") calls

        int extra_bases = (length - (int) Math.abs(frame)) % 3;

        int k = 0;

        if (pre_string == null && post_string == null) {

            for (int i = frame; i < length - 2; i += 3, k = i) {

                residue = genetic_code[basenums[i]][basenums[i + 1]][basenums[i + 2]];

                amino_acids.append(residue);


            }

            for (int i = 0; i < extra_bases; i++) {

                amino_acids.append(" ");

            }

        } else {

            if (pre_string == null) {

                pre_string = "";

            }

            if (post_string == null) {

                post_string = "";

            }

            for (int i = frame; i < length - 2; i += 3) {

                residue = genetic_code[basenums[i]][basenums[i + 1]][basenums[i + 2]];

                amino_acids.append(pre_string);

                amino_acids.appe
展开阅读全文