Changeset 1912 for molgenis4xgap_importwizard
- Timestamp:
- 04/07/09 15:40:03 (3 years ago)
- Location:
- molgenis4xgap_importwizard/handwritten/java/cistransanalysis3
- Files:
-
- 4 edited
-
Run.java (modified) (2 diffs)
-
Step3456.java (modified) (6 diffs)
-
Step78.java (modified) (8 diffs)
-
makeUniqueSortAndPrint.java (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
molgenis4xgap_importwizard/handwritten/java/cistransanalysis3/Run.java
r1910 r1912 52 52 for(int index=0; index<intervalGenes.size();index++){ 53 53 Marker m = Step3456.getMarkerInfo(db, intervalGenes.get(index).getEr().getMarkerName()); 54 pw.println("Interval of " + intervalGenes.get(index).getEr().getTraitname() + " on chr " + m.getChr() + " has " + intervalGenes.get(index).getGenesFromInterval().size() + " genes (cM = " + intervalGenes.get(index).getEr().getCiLower() + " to " 55 + intervalGenes.get(index).getEr().getCiUpper() + ")"); 54 pw.println("Interval of " + intervalGenes.get(index).getEr().getTraitname() + " on chr " + m.getChr() + " has " + intervalGenes.get(index).getGenesFromInterval().size() + " genes."); 56 55 57 56 } … … 119 118 String[] pathways = new String[]{"carotenoids","flavonols", "glucosinolate", "phenylpropanoid", "tocopherol", "folate"}; 120 119 121 //String[] pathways = new String[]{" phenylpropanoid"};120 //String[] pathways = new String[]{"glucosinolate"}; 122 121 123 122 PrintWriter pw = new PrintWriter(new BufferedWriter(new FileWriter("D:/data/xgapdata/Frank_for_dbGG/cistransanalysis/results/output.txt"))); -
molgenis4xgap_importwizard/handwritten/java/cistransanalysis3/Step3456.java
r1910 r1912 35 35 { 36 36 List<EQTLResultMap> intervalGenes = new ArrayList<EQTLResultMap>(); 37 38 System.out.println("getting all markers");37 38 // System.out.println("getting all markers"); 39 39 List<Marker> allMarkers = db.find(Marker.class); 40 40 41 41 for (IntervalPeakResult er : reducedList2) 42 42 { 43 System.out.println("getting marker/gene info");43 // System.out.println("getting marker/gene info"); 44 44 Marker m = getMarkerInfo(db, er.getMarkerName()); 45 45 Gene g = getGeneInfo(db, er.getTraitname()); 46 System.out.println("\tdone");46 // System.out.println("\tdone"); 47 47 48 48 // Step 3 & 4 - get confidence intervals, and select closest markers 49 System.out.println("getting closest markers");49 // System.out.println("getting closest markers"); 50 50 Marker ciLowerClosestMarker = findMarkerClosestToCM(allMarkers, er.getCiLower(), Integer.parseInt(m 51 51 .getChr())); 52 52 Marker ciUpperClosestMarker = findMarkerClosestToCM(allMarkers, er.getCiUpper(), Integer.parseInt(m 53 53 .getChr())); 54 System.out.println("\tdone"); 54 // System.out.println("\tdone"); 55 56 if(ciLowerClosestMarker.getName().equals(ciUpperClosestMarker.getName())){ 57 String warning = "WARNING: the same markers ("+ciLowerClosestMarker.getName()+") mapped for interval positions"; 58 System.out.println(warning); 59 pw.println(warning); 60 } 55 61 56 62 // Step 5 - get the bp positions of thse markers … … 61 67 List<Gene> genesFromInterval = findGenesFromInterval(db, Integer.parseInt(m.getChr()), lowerBp, upperBp); 62 68 63 boolean isCis = checkIfCis( er, m, g);69 boolean isCis = checkIfCis(m, g, ciLowerClosestMarker.getBpStart(), ciUpperClosestMarker.getBpStart()); 64 70 65 71 if (!isCis) … … 67 73 intervalGenes.add(new EQTLResultMap(er, genesFromInterval, isCis, (((double) lowerBp) / 1000000.0), 68 74 (((double) upperBp) / 1000000.0))); 69 pw.println((isCis ? "Cis" : "Trans") + " QTL of " + er.getTraitname() + " at marker " 70 + er.getMarkerName() + " on chr " + g.getChr() + " has interval: " + lowerBp + "bp to " 71 + upperBp + "bp on chr " + Integer.parseInt(m.getChr()) + ". Closest markers: " 72 + ciLowerClosestMarker.getName() + " to " + ciUpperClosestMarker.getName()); 73 74 System.out.println((isCis ? "Cis" : "Trans") + " QTL of " + er.getTraitname() + " at marker " 75 + er.getMarkerName() + " on chr " + g.getChr() + " has interval: " + lowerBp + "bp to " 76 + upperBp + "bp on chr " + Integer.parseInt(m.getChr()) + ". Closest markers: " 77 + ciLowerClosestMarker.getName() + " to " + ciUpperClosestMarker.getName()); 78 75 76 String info = (isCis ? "Cis" : "Trans") + " QTL of " + er.getTraitname() + " (~" + meanGeneBpPos(g) 77 + "bp, chr " + g.getChr() + ") at marker " + er.getMarkerName() + " (" + m.getCM() + "cM, chr " 78 + m.getChr() + ")" + " has interval mapped to true markers: " + ciLowerClosestMarker.getName() 79 + " (" + ciLowerClosestMarker.getCM() + "cM, " + ciLowerClosestMarker.getBpStart() + "bp) and " 80 + ciUpperClosestMarker.getName() + " (" + ciUpperClosestMarker.getCM() + "cM, " 81 + ciUpperClosestMarker.getBpStart() + "bp)."; 82 83 pw.println(info); 84 System.out.println(info); 85 79 86 } 80 87 … … 94 101 } 95 102 96 private boolean checkIfCis(IntervalPeakResult er, Marker m, Gene g) 97 { 98 if (g.getChr() == m.getChr()) 99 { 100 if (m.getCM() > er.getCiLower() && m.getCM() < er.getCiUpper()) 103 private boolean checkIfCis(Marker m, Gene g, long ciLowerBp, long ciUpperBp) 104 { 105 if (g.getChr().equals(m.getChr())) 106 { 107 long geneBpPos = meanGeneBpPos(g); 108 109 if (geneBpPos > ciLowerBp && geneBpPos < ciUpperBp) 101 110 { 102 111 return true; … … 104 113 } 105 114 return false; 115 } 116 117 static long meanGeneBpPos(Gene g) 118 { 119 long geneBpPos; 120 if (g.getBpStart() < g.getBpEnd()) 121 { 122 geneBpPos = g.getBpStart() + ((long) g.getBpEnd() - (long) g.getBpStart()) / (long) 2.0; 123 } 124 else if (g.getBpStart() > g.getBpEnd()) 125 { 126 geneBpPos = g.getBpEnd() + ((long) g.getBpStart() - (long) g.getBpEnd()) / (long) 2.0; 127 } 128 else 129 { 130 geneBpPos = g.getBpStart(); 131 } 132 return geneBpPos; 133 } 134 135 static long meanGeneBpPos(BiosynthGene g) 136 { 137 long geneBpPos; 138 if (g.getBpStart() < g.getBpEnd()) 139 { 140 geneBpPos = g.getBpStart() + ((long) g.getBpEnd() - (long) g.getBpStart()) / (long) 2.0; 141 } 142 else if (g.getBpStart() > g.getBpEnd()) 143 { 144 geneBpPos = g.getBpEnd() + ((long) g.getBpStart() - (long) g.getBpEnd()) / (long) 2.0; 145 } 146 else 147 { 148 geneBpPos = g.getBpStart(); 149 } 150 return geneBpPos; 106 151 } 107 152 … … 128 173 private Marker findMarkerClosestToCM(List<Marker> allMarkers, double cM, int chr) 129 174 { 130 System.out.println("finding marker...");131 175 double smallestDiff = -1; 132 176 Marker closestMarker = null; -
molgenis4xgap_importwizard/handwritten/java/cistransanalysis3/Step78.java
r1910 r1912 17 17 18 18 import org.apache.commons.math.MathException; 19 import org.apache.commons.math.stat.descriptive.moment.Mean; 19 20 import org.apache.commons.math.stat.regression.SimpleRegression; 20 21 import org.molgenis.assets.data.DatabaseException; … … 44 45 + " for Pearson's product moment correlation coefficient\n"); 45 46 47 List<Marker> allMarkers = db.find(Marker.class); 48 46 49 Map<IntervalPeakResult, List<Correlation>> correlatedGenes = new HashMap<IntervalPeakResult, List<Correlation>>(); 47 50 … … 57 60 if (intervalGenes.get(index).getGenesFromInterval().size() > 0) 58 61 { 59 62 60 63 Marker mt = Step3456.getMarkerInfo(db, intervalGenes.get(index).getEr().getMarkerName()); 61 64 Gene gt = Step3456.getGeneInfo(db, intervalGenes.get(index).getEr().getTraitname()); 62 65 63 pw.println("Investigating all " + intervalGenes.get(index).getGenesFromInterval().size() 64 + " genes within the QTL confidence interval at chr " 65 + mt.getChr() + " of gene " 66 + intervalGenes.get(index).getEr().getTraitname() + ", located at chr " 67 + gt.getChr() + ""); 68 69 System.out.println("Investigating all " + intervalGenes.get(index).getGenesFromInterval().size() 70 + " genes within the QTL confidence interval at chr " 71 + mt.getChr() + " of gene " 72 + intervalGenes.get(index).getEr().getTraitname() + ", located at chr " 73 + gt.getChr() + ""); 66 String info = "Investigating all " + intervalGenes.get(index).getGenesFromInterval().size() 67 + " genes within the QTL confidence interval at chr " + mt.getChr() + " of gene " 68 + intervalGenes.get(index).getEr().getTraitname() + ", located at chr " + gt.getChr(); 69 70 pw.println(info); 71 System.out.println(info); 74 72 75 73 for (Gene g : intervalGenes.get(index).getGenesFromInterval()) 76 74 { 77 78 int GeneID = db.find(Gene.class, new QueryRule("name", Operator.EQUALS, g.getName())).get(0)79 .getId();80 QueryRule row = new QueryRule("row", Operator.EQUALS, GeneID);81 List<DecimalDataElement> genePVals = db.find(DecimalDataElement.class, data, row);82 83 75 for (BiosynthGene bg : biosynthGenes) 84 76 { 77 // onderzochte trait moet een pathway gene zijn 85 78 if (bg.getAgiId().equals(intervalGenes.get(index).getEr().getTraitname())) 86 79 { 87 80 // interval gene mag niet dezelfde zijn als dit gene 81 // (correlation 1.0) 88 82 if (!g.getName().equals(bg.getAgiId())) 89 83 { 84 int GeneID = db.find(Gene.class, new QueryRule("name", Operator.EQUALS, g.getName())) 85 .get(0).getId(); 86 QueryRule row = new QueryRule("row", Operator.EQUALS, GeneID); 87 List<DecimalDataElement> genePVals = db.find(DecimalDataElement.class, data, row); 88 90 89 double corr = correlate(genePVals, biosynthExpressionMap.get(bg.getAgiId())); 90 91 double mean1 = mean(genePVals); 92 double mean2 = mean(biosynthExpressionMap.get(bg.getAgiId())); 91 93 92 93 94 // if (corr > thresholdCorrStep8 || -corr > thresholdCorrStep8) 95 // { 96 97 boolean isCisCorrelation = isCisCorrelation(bg, intervalGenes.get(index), mt); 98 Correlation corrObj = new Correlation(g, bg, corr, isCisCorrelation); 99 correlatedGenesList.add(corrObj); 100 // } 101 102 94 // if (corr > thresholdCorrStep8 || -corr > 95 // thresholdCorrStep8) 96 // { 97 98 boolean isCisCorrelation = isCisCorrelation(bg, intervalGenes.get(index), mt); 99 Correlation corrObj = new Correlation(g, bg, corr, isCisCorrelation, mean1, mean2); 100 correlatedGenesList.add(corrObj); 101 // } 102 103 103 } 104 104 } … … 121 121 else 122 122 { 123 // pw.println("\tA total of " + correlatedGenesList.size() + " genes correlated with known " + pathway 124 // + " genes located elsewhere."); 125 } 126 123 // pw.println("\tA total of " + correlatedGenesList.size() + 124 // " genes correlated with known " + pathway 125 // + " genes located elsewhere."); 126 } 127 127 128 Collections.sort(correlatedGenesList, new SortCorrelationList()); 128 correlatedGenesList = correlatedGenesList.subList(0, correlatedGenesList.size() > 10 ? 10 : correlatedGenesList.size()); 129 130 for(Correlation c : correlatedGenesList){ 131 if (c.isCis()) 129 correlatedGenesList = correlatedGenesList.subList(0, correlatedGenesList.size() > 20 ? 20 130 : correlatedGenesList.size()); 131 132 for (Correlation c : correlatedGenesList) 133 { 134 // if (c.isCis()) 135 // { 136 // pw.print("\tCorr. (near): "); 137 // } 138 // else 139 // { 140 // pw.print("\tCorr. (far): "); 141 // } 142 143 Marker m1 = findMarkerClosestToBp(allMarkers, (int)Step3456.meanGeneBpPos(c.getGene()), Integer.parseInt(c.getGene().getChr())); 144 Marker m2 = findMarkerClosestToBp(allMarkers, (int)Step3456.meanGeneBpPos(c.getBiosynthGene()), c.getBiosynthGene().getChromosome()); 145 146 double lod1 = getLOD(c.getGene().getName(), m1, db); 147 double lod2 = getLOD(c.getBiosynthGene().getAgiId(), m2, db); 148 149 double avgExpr1 = c.getAvgOfGene(); 150 double avgExpr2 = c.getAvgOfBiosynthGene(); 151 152 String info = c.getGene().getName() + " (chr " + c.getGene().getChr() + ", " 153 + Step3456.meanGeneBpPos(c.getGene()) + "bp, LOD at closest marker = "+lod1+", avg. expr. = "+avgExpr1+") correlates with " + c.getBiosynthGene().getAgiId() 154 + " (chr " + c.getBiosynthGene().getChromosome() + ", ~" 155 + Step3456.meanGeneBpPos(c.getBiosynthGene()) + "bp, LOD at closest marker = "+lod2+", avg. expr. = "+avgExpr2+"), strength = " + c.getCorrelation(); 156 157 pw.println(info); 158 System.out.println(info); 159 } 160 161 correlatedGenes.put(intervalGenes.get(index).getEr(), correlatedGenesList); 162 } 163 this.setCorrelatedGenes(correlatedGenes); 164 } 165 166 private double mean(List<DecimalDataElement> genePVals) 167 { 168 double total = 0; 169 for(DecimalDataElement d : genePVals){ 170 total += d.getValue(); 171 } 172 return total / ((double)genePVals.size()); 173 } 174 175 private double getLOD(String geneName, Marker m, JDBCDatabase db) throws DatabaseException{ 176 int dataID = db.find(Data.class, new QueryRule("name", Operator.EQUALS, "Gene_lodscores")).get(0).getId(); 177 int geneID = db.find(Gene.class, new QueryRule("name", Operator.EQUALS, geneName)).get(0).getId(); 178 179 QueryRule data = new QueryRule("data", Operator.EQUALS, dataID); 180 QueryRule row = new QueryRule("row", Operator.EQUALS, geneID); 181 QueryRule col = new QueryRule("col", Operator.EQUALS, m.getId()); 182 183 return db.find(DecimalDataElement.class, data, row, col).get(0).getValue(); 184 } 185 186 private Marker findMarkerClosestToBp(List<Marker> allMarkers, int bp, int chr) 187 { 188 double smallestDiff = -1; 189 Marker closestMarker = null; 190 for (Marker tryMarker : allMarkers) 191 { 192 if (tryMarker.getBpStart() != (long) 0) 193 { 194 if (Integer.parseInt(tryMarker.getChr()) == chr) 132 195 { 133 pw.print("\tCorr. (cis): "); 196 double diff = Math.abs(tryMarker.getBpStart() - bp); 197 198 if (diff < smallestDiff && smallestDiff != -1) 199 { 200 smallestDiff = diff; 201 closestMarker = tryMarker; 202 } 203 else if (smallestDiff == -1) 204 { 205 smallestDiff = diff; 206 closestMarker = tryMarker; 207 } 134 208 } 135 else 136 { 137 pw.print("\tCorr. (trans): "); 138 } 139 pw.println(c.getGene().getName() + " (chr " + c.getGene().getChr() 140 + ") correlates with " + c.getBiosynthGene().getAgiId() + ", strength = " + c.getCorrelation()); 141 142 System.out.println(c.getGene().getName() + " (chr " + c.getGene().getChr() 143 + ") correlates with " + c.getBiosynthGene().getAgiId() + ", strength = " + c.getCorrelation()); 144 } 145 146 147 148 correlatedGenes.put(intervalGenes.get(index).getEr(), correlatedGenesList); 149 } 150 this.setCorrelatedGenes(correlatedGenes); 209 } 210 } 211 return closestMarker; 151 212 } 152 213 153 214 private boolean isCisCorrelation(BiosynthGene bg, EQTLResultMap resultMap, Marker m) 154 215 { 155 if (bg.getChromosome() == (int)Integer.parseInt(m.getChr())) 156 { 216 // System.out.println("*** isCisCorrelation?"); 217 // System.out.println("\tbg.getChromosome() = " + bg.getChromosome() + 218 // ", m.getChr() = " + m.getChr()); 219 if (bg.getChromosome() == (int) Integer.parseInt(m.getChr())) 220 { 221 // System.out.println("\tYES"); 222 // System.out.println("\tbg.getMbPos() = " + bg.getMbPos()+ 223 // ", IntervalMbStart() = " + resultMap.getIntervalMbStart() + 224 // ", IntervalMbEnd() = " + resultMap.getIntervalMbEnd()); 225 // System.out.println("\tbg > start && bg < end?"); 157 226 if (bg.getMbPos() > resultMap.getIntervalMbStart() && bg.getMbPos() < resultMap.getIntervalMbEnd()) 158 227 { 228 // System.out.println("\tYES"); 159 229 return true; 160 230 } … … 181 251 } 182 252 183 private String printGene(Gene g)184 {185 String print = "";186 print += "chr = " + g.getChr() + ", ";187 print += "bpstr = " + g.getBpStart() + ", ";188 print += "bpend = " + g.getBpEnd();189 return print;190 }191 192 private String printBiosynthGene(BiosynthGene bg)193 {194 String print = "";195 print += "abbr = " + bg.getGeneAbbr() + ", ";196 print += "prot = " + bg.getProtein() + ", ";197 print += "pathw = " + bg.getPathway() + ", ";198 print += "orien = " + bg.getOrientation() + ", ";199 print += "chr = " + bg.getChromosome() + ", ";200 print += "bpstr = " + bg.getBpStart() + ", ";201 print += "bpend = " + bg.getBpEnd() + ", ";202 print += "mbpos = " + bg.getMbPos();203 return print;204 }205 206 private BiosynthGene getBiosynthGene(Gene gene, List<BiosynthGene> biosynthGenes)207 {208 for (BiosynthGene bg : biosynthGenes)209 {210 if (bg.getAgiId().equals(gene.getName()))211 {212 return bg;213 }214 }215 return null;216 }217 218 253 private double correlate(List<DecimalDataElement> genePVals, List<DecimalDataElement> corrGenePVals) 219 254 throws MathException … … 244 279 } 245 280 246 247 class SortCorrelationList implements Comparator<Correlation>{ 248 public int compare(Correlation c1, Correlation c2) { 249 return ((Double)(Math.abs(c2.getCorrelation()))).compareTo((Double)(Math.abs(c1.getCorrelation()))); 250 } 281 class SortCorrelationList implements Comparator<Correlation> 282 { 283 public int compare(Correlation c1, Correlation c2) 284 { 285 return ((Double) (Math.abs(c2.getCorrelation()))).compareTo((Double) (Math.abs(c1.getCorrelation()))); 286 } 251 287 } 252 288 253 289 class Correlation 254 290 { 255 public Correlation(Gene gene, BiosynthGene biosynthGene, double correlation, boolean isCis )291 public Correlation(Gene gene, BiosynthGene biosynthGene, double correlation, boolean isCis, double avgOfGene, double avgOfBiosynthGene) 256 292 { 257 293 this.gene = gene; … … 259 295 this.correlation = correlation; 260 296 this.isCis = isCis; 297 this.avgOfGene = avgOfGene; 298 this.avgOfBiosynthGene = avgOfBiosynthGene; 261 299 } 262 300 … … 266 304 boolean isCis; 267 305 268 public boolean isCis(){ 306 double avgOfGene; 307 double avgOfBiosynthGene; 308 309 310 311 public double getAvgOfGene() 312 { 313 return avgOfGene; 314 } 315 316 public double getAvgOfBiosynthGene() 317 { 318 return avgOfBiosynthGene; 319 } 320 321 public boolean isCis() 322 { 269 323 return isCis; 270 324 } -
molgenis4xgap_importwizard/handwritten/java/cistransanalysis3/makeUniqueSortAndPrint.java
r1910 r1912 24 24 25 25 Collections.sort(uniqCorr, new SortSimpleCorrelation()); 26 pw.println("UnknownGene_name\tUnknownGene_chr\t UnknownGene_isPathwayGene\tPathwayGene_name\tPathwayGene_chr\tCorrelationStrength");26 pw.println("UnknownGene_name\tUnknownGene_chr\tPathwayGene_name\tPathwayGene_chr\tCorrelationStrength"); 27 27 for (SimpleCorrelation sc : uniqCorr) { 28 pw.println(sc.getGene1() + "\t" + sc.getChrOfGene1() + "\t" + sc. isPathwayGene1() + "\t" + sc.getGene2() + "\t" + sc.getChrOfGene2() + "\t" + sc.getCorr());28 pw.println(sc.getGene1() + "\t" + sc.getChrOfGene1() + "\t" + sc.getGene2() + "\t" + sc.getChrOfGene2() + "\t" + sc.getCorr()); 29 29 } 30 30 31 31 pw.println("___________\n"); 32 }33 34 private String cisOrTrans(int chrOfGene1, int chrOfGene2)35 {36 if(chrOfGene1==chrOfGene2){37 return "cis";38 }else{39 return "trans";40 }41 32 } 42 33
Note: See TracChangeset
for help on using the changeset viewer.