//import java.io.*;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import org.biojava.bio.seq.DNATools;
import org.biojava.bio.seq.RNATools;
import org.biojava.bio.seq.SequenceIterator;
import org.biojava.bio.seq.io.SeqIOTools;
import org.biojava.bio.symbol.IllegalSymbolException;
import org.biojava.bio.symbol.PointLocation;
import org.biojava.bio.symbol.Symbol;
import org.biojava.bio.symbol.SymbolList;
public class SearchRNAi {
public static PrintWriter output;
static {
try {
output = new PrintWriter( new FileWriter( "RNAioutput.txt" ), true );
}
catch( Exception e ) {}
}
public static PrintWriter outfasta;
static {
try {
outfasta= new PrintWriter( new FileWriter("fastafile.txt"), true);
}
catch( Exception e ) {}
}
static int Input(){
int input = 0 ;
BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
String temp1 = "";
try {
temp1 = in.readLine();
}
catch (IOException e) {
// TODO 自动生成 catch 块
e.printStackTrace();
}
input = Integer.parseInt(temp1);
return input;
}
static String Inputstring(){
BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
String inputstring = new String();
try {
inputstring = in.readLine();
}
catch (IOException e) {
// TODO 自动生成 catch 块
e.printStackTrace();
}
return inputstring;
}
public static void main(String[] args) throws Exception{
System.out.println("=============SearchRNAi V1.0===============");
System.out.println(" 欢迎使用本软件,如果有任何问题请与作者联系");
System.out.println(" Email:forrest_zhang@263.net");
// System.out.println(" 川大学分子生物学实验室版权所有 2003~2004");
System.out.println("===========================================");
System.out.println();
System.out.println("请输入目标序列长度");
int dsl=Input()+1; //dsl=dna sequcnece length
System.out.println("请输入GC含量最小值");
int min=Input();
System.out.println("请输入GC含量最大值");
int max=Input();
System.out.println("请选择siRNAs特征序列的类型");
System.out.println("1 (nn)---");
System.out.println("2 (aa)---");
System.out.println("3 (aa)g---t");
System.out.println("4 (na)g---c");
System.out.println("5 (aa)g---c");
System.out.println("6 (aa)a---c");
System.out.println("7 (aa)a---t");
System.out.println("8 (na)---");
System.out.println("9 (an)---");
System.out.println("10 (aa)---tt");
System.out.println("11 自定义");
int anchortypes=Input();
String customanchor= new String();
if (anchortypes==11){
System.out.println("请严格按格式要求输入自定义的特征序列");
System.out.println("(nn):括号内为siRNA序列前的识别序列,括号一位后为siRNA序列的第一个核苷酸");
System.out.println("然后请输入---,注意'-'为3个,最后输入siRNA序列的最后一个核苷酸");
System.out.println("例如:(an)g---nc,siRNA序列前的识别序列为an,n为任意核苷酸,第一个核苷酸为g,最后一个核苷酸为c");
customanchor = Inputstring();
// System.out.println(customanchor);
// System.out.println(customanchor.charAt(1) );
// System.out.println(customanchor.charAt(2) );
// System.out.println(customanchor.charAt(4) );
// System.out.println(customanchor.charAt(9) );
// char ca1=customanchor.charAt(1); //ca=custom anchorypes
// char ca2=customanchor.charAt(2);
// char ca3=customanchor.charAt(3);
// char ca4=customanchor.charAt(dsl);
}
System.out.println("请选择siRNAs的loop类型");
System.out.println("1 loop : ttcaagaga");
System.out.println("2 loop : tttgtgtag");
System.out.println("3 loop : atg");
System.out.println("4 loop : ccc ");
System.out.println("5 loop : ttcg");
System.out.println("6 loop : ccacc");
System.out.println("7 loop : ctcgag ");
System.out.println("9 loop : ccacacc");
System.out.println("10 自定义looop");
int looptype=Input();
String customloop=new String();
if (looptype==10){
System.out.println("请输入自定义的loop类型");
customloop=Inputstring();
}
System.out.println("请选择中止序列类型");
System.out.println("1 Pol III terminator : ttttttggaa");
System.out.println("2 Pol III terminatro Minimum: tttttt");
System.out.println("3 自定义中止序列");
int termtype=Input();
String customterm=new String();
if (termtype==3){
System.out.println("请输入自定义中止序列类型");
customterm=Inputstring();
}
// System.out.println("请输入输出siRNA文件的文件名");
// String siRNAfile = Inputstring();
// System.out.println("请输入输出fasta");
// String fastafile = Inputstring();
output.println(" =============SearchRNAi V1.0===============");
output.println(" 欢迎使用本软件,如果有任何问题请与作者联系");
output.println(" Email:forrest_zhang@263.net");
// output.println(" 川大学分子生物学实验室版权所有 2003~2004");
output.println(" ===========================================");
output.println();
if (args.length != 1)
throw new Exception();//"usage: java GCContent filename.fa"
String fileName = args[0];
// Set up sequence iterator
BufferedReader br = new BufferedReader(
new FileReader(fileName));
SequenceIterator stream = SeqIOTools.readFastaDNA(br);
SymbolList symL = null;
// get sequence from a file
try {
symL = stream.nextSequence();
}
catch (IllegalSymbolException ex) {
ex.printStackTrace();
}
int ssl=symL.length(); //ssl=sorce sequcne length
// String seqname= br.getName();
int number=1;
for (int i=1,j=ssl-dsl; i < j ;i++){
SymbolList subDNA1= symL.subList(i,i+dsl); //验证前的DNA数据
SymbolList subDNA2= symL.subList(i+2,i+dsl);
String subDNAseq1=symL.subStr(i,i+dsl); //验证前的DNA数据
String subDNAseq2=symL.subStr(i+2,i+dsl); //验证后的DNA数据
// String subDNAcase11=symL.subStr(i,i+dsl+2); //case11
int gc = 0;
for (int pos = 1; pos <= subDNA2.length(); ++pos) {
Symbol sym = subDNA2.symbolAt(pos);
if (sym == DNATools.g() || sym == DNATools.c())
++gc; //统计gc的碱基数
}
boolean gccontent = (((gc * 100.0) / subDNA2.length())<=max) && (((gc * 100.0) / subDNA2.length())>=min);
PointLocation point1 = new PointLocation(i);
PointLocation point2 = new PointLocation(i+1);
PointLocation point3 = new PointLocation(i+2);
PointLocation point4 = new PointLocation(i+dsl);
PointLocation point5 = new PointLocation(i+dsl-1);
// PointLocation point6 = new PointLocation(i+dsl+2);
String anchor=new String();
boolean stem=false;
switch (anchortypes){
case 1:
stem=true;
anchor= "(NN)-----";
break;
case 2:
if (subDNAseq1.charAt(0)=='a' && subDNAseq1.charAt(1)=='a' ){
stem=true;
anchor= "(AA)------" ;
}
else stem=false;
break;
case 3:
if (subDNAseq1.charAt(0)=='a' && subDNAseq1.charAt(1)=='a'
&& subDNAseq1.charAt(2)=='g' && subDNAseq1.charAt(dsl)=='t' ){
stem=true;
anchor= "(AA)G----T";
}
else stem=false;
break;
case 4:
if (subDNAseq1.charAt(1)=='a'
&& subDNAseq1.charAt(2)=='g' && subDNAseq1.charAt(dsl)=='c' ){
stem=true;
anchor= "(NA)G----C";
}
else stem=false;
break;
case 5:
if (subDNAseq1.charAt(0)=='a' && subDNAseq1.charAt(1)=='a'
&& subDNAseq1.charAt(2)=='g' && subDNAseq1.charAt(dsl)=='c' ){
stem=true;
anchor ="(AA)G----C";
}
else stem=false;
break;
case 6:
if (subDNAseq1.charAt(0)=='a' && subDNAseq1.charAt(1)=='a'
&& subDNAseq1.charAt(2)=='a' && subDNAseq1.charAt(dsl)=='c' ){
stem=true;
anchor= "(AA)A----C";
}
else stem=false;
break;
case 7:
if (subDNAseq1.charAt(0)=='a' && subDNAseq1.charAt(1)=='a'
&& subDNAseq1.charAt(2)=='a' && subDNAseq1.charAt(dsl)=='t' ){
stem=true;
anchor= "(AA)A----T";
}
else stem=false;
break;
case 8:
if (subDNAseq1.charAt(1)=='a'){
stem=true;
anchor= "(NA)-----";
}
else stem=false;
break;
case 9:
if (subDNAseq1.charAt(0)=='a' ){
stem=true;
anchor= "(AN)-----";
}
else stem=false;
break;
case 10:
if (subDNAseq1.charAt(0)=='a' && subDNAseq1.charAt(1)=='a'
&& subDNAseq1.charAt(dsl-1)=='t'&& subDNAseq1.charAt(dsl)=='t' ){
stem=true;
anchor= "(AA)----TT";
}
else stem=false;
break;
case 11:
if ((subDNAseq1.charAt(0)==customanchor.charAt(1)||customanchor.charAt(1)=='n')
&& (subDNAseq1.charAt(1)==customanchor.charAt(2)||customanchor.charAt(2)=='n')
&& (subDNAseq1.charAt(2)==customanchor.charAt(4)||customanchor.charAt(4)=='n')
&& (subDNAseq1.charAt(dsl-1)==customanchor.charAt(8)||customanchor.charAt(8)=='n')
&& (subDNAseq1.charAt(dsl)==customanchor.charAt(9)||customanchor.charAt(9)=='n')){
stem =true;
anchor=customanchor;
}
else stem=false;
break;
}
String loop = new String();
switch(looptype){
case 1 :loop ="ttcaagaga"; break;
case 2 :loop ="tttgtgtag"; break;
case 3: loop ="atg"; break;
case 4: loop ="ccc" ; break;
case 5: loop ="ttcg"; break;
case 6: loop ="ccacc"; break;
case 7: loop ="ctcgag" ; break;
case 8: loop ="aagctt" ; break;
case 9: loop ="ccacacc"; break;
case 10:loop=customloop; break;
}
String term=new String();
switch(termtype){
case 1 :term="ttttttggaa"; break;
case 2 :term="tttttt"; break;
case 3 :term=customterm;break;
}
if ( gccontent && stem ){
String DNAcomplement=DNATools.reverseComplement(subDNA2).seqString();
output.println("序列"+ number);
output.println("源序列: "+subDNAseq1);
output.println("源序列位置: "+i+"bp");
output.println("结果序列: "+ subDNAseq2+ loop + DNAcomplement + term );
output.println("siRNA GC含量: "+ min +"%"+" ~ "+max+"%");
output.println("siRNA序列: "+ subDNAseq2);
output.println("序列类型: "+ anchor);
output.println("siRNA长度: "+ (dsl-1));
output.println("siRNA互补序列: "+ DNAcomplement);
output.println("loop类型: "+ loop);
output.println("聚合酶中止子序列:"+ term);
output.println("最终结果序列: " + RNATools.transcribe(DNATools.createDNA(subDNAseq2+ loop
+ DNAcomplement + term)).seqString());
output.println("================================================================================");
output.println("");
outfasta.println(">seq"+number);
outfasta.println(subDNAseq2);
number++;