本文整理汇总了Java中org.apache.spark.broadcast.Broadcast类的典型用法代码示例。如果您正苦于以下问题:Java Broadcast类的具体用法?Java Broadcast怎么用?Java Broadcast使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Broadcast类属于org.apache.spark.broadcast包,在下文中一共展示了Broadcast类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: setPartitionHeaders
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
public static JavaRDD<SAMRecord> setPartitionHeaders(final JavaRDD<SAMRecord> reads, final Broadcast<SAMFileHeader> header) {
return reads.mapPartitions(records -> {
//header.getValue().setTextHeader(header.getValue().getTextHeader()+"\\[email protected]\\tSN:"+records..getReferenceName());
//record.setHeader(header);
BAMHeaderOutputFormat.setHeader(header.getValue());
return records;
});
}
开发者ID:NGSeq,项目名称:ViraPipe,代码行数:11,代码来源:HDFSWriter.java
示例2: run
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
/**
*
* @param topKvalueCandidates the topK results per entity, acquired from value similarity
* @param rawTriples1 the rdf triples of the first entity collection
* @param rawTriples2 the rdf triples of the second entity collection
* @param SEPARATOR the delimiter that separates subjects, predicates and objects in the rawTriples1 and rawTriples2 files
* @param entityIds1 the mapping of entity urls to entity ids, as it was used in blocking
* @param entityIds2
* @param MIN_SUPPORT_THRESHOLD the minimum support threshold, below which, relations are discarded from top relations
* @param K the K for topK candidate matches
* @param N the N for topN rdf relations (and neighbors)
* @param jsc the java spark context used to load files and broadcast variables
* @return topK neighbor candidates per entity
*/
public JavaPairRDD<Integer, IntArrayList> run(JavaPairRDD<Integer,Int2FloatLinkedOpenHashMap> topKvalueCandidates,
JavaRDD<String> rawTriples1,
JavaRDD<String> rawTriples2,
String SEPARATOR,
JavaRDD<String> entityIds1,
JavaRDD<String> entityIds2,
float MIN_SUPPORT_THRESHOLD,
int K,
int N,
JavaSparkContext jsc) {
Map<Integer,IntArrayList> inNeighbors = new HashMap<>(new RelationsRank().run(rawTriples1, SEPARATOR, entityIds1, MIN_SUPPORT_THRESHOLD, N, true, jsc));
inNeighbors.putAll(new RelationsRank().run(rawTriples2, SEPARATOR, entityIds2, MIN_SUPPORT_THRESHOLD, N, false, jsc));
Broadcast<Map<Integer,IntArrayList>> inNeighbors_BV = jsc.broadcast(inNeighbors);
JavaPairRDD<Integer, IntArrayList> topKneighborCandidates = getTopKNeighborSimsSUM(topKvalueCandidates, inNeighbors_BV, K);
return topKneighborCandidates;
}
开发者ID:vefthym,项目名称:MinoanER,代码行数:33,代码来源:CNPARCS.java
示例3: main
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
public static void main(String[] args) {
// SparkConf conf = new SparkConf().setMaster("local").setAppName("BroadCasting");
// JavaSparkContext jsc = new JavaSparkContext(conf);
//
// Broadcast<String> broadcastVar = jsc.broadcast("Hello Spark");
//
SparkSession sparkSession = SparkSession.builder().master("local").appName("My App")
.config("spark.sql.warehouse.dir", "file:////C:/Users/sgulati/spark-warehouse").getOrCreate();
Broadcast<String> broadcastVar= sparkSession.sparkContext().broadcast("Hello Spark", scala.reflect.ClassTag$.MODULE$.apply(String.class));
System.out.println(broadcastVar.getValue());
broadcastVar.unpersist();
// broadcastVar.unpersist(true);
broadcastVar.destroy();
}
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:20,代码来源:BroadcastVariable.java
示例4: readsToWritable
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
public static JavaPairRDD<SAMRecord, SAMRecordWritable> readsToWritable(JavaRDD<SAMRecord> records, Broadcast<SAMFileHeader> header) {
return records.mapToPair(read -> {
//SEQUENCE DICTIONARY must be set here for the alignment because it's not given as header file
//Set in alignment to sam map phase
if(header.getValue().getSequenceDictionary()==null) header.getValue().setSequenceDictionary(new SAMSequenceDictionary());
if(header.getValue().getSequenceDictionary().getSequence(read.getReferenceName())==null)
header.getValue().getSequenceDictionary().addSequence(new SAMSequenceRecord(read.getReferenceName()));
//read.setHeader(read.getHeader());
read.setHeaderStrict(header.getValue());
final SAMRecordWritable samRecordWritable = new SAMRecordWritable();
samRecordWritable.set(read);
return new Tuple2<>(read, samRecordWritable);
});
}
开发者ID:NGSeq,项目名称:ViraPipe,代码行数:17,代码来源:HDFSWriter.java
示例5: run
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
/**
*
* @param topKvalueCandidates the topK results per entity, acquired from value similarity
* @param rawTriples1 the rdf triples of the first entity collection
* @param rawTriples2 the rdf triples of the second entity collection
* @param SEPARATOR the delimiter that separates subjects, predicates and objects in the rawTriples1 and rawTriples2 files
* @param entityIds1 the mapping of entity urls to entity ids, as it was used in blocking
* @param entityIds2
* @param MIN_SUPPORT_THRESHOLD the minimum support threshold, below which, relations are discarded from top relations
* @param K the K for topK candidate matches
* @param N the N for topN rdf relations (and neighbors)
* @param jsc the java spark context used to load files and broadcast variables
* @return topK neighbor candidates per entity
*/
public JavaPairRDD<Integer, IntArrayList> run(JavaPairRDD<Integer,Int2FloatLinkedOpenHashMap> topKvalueCandidates,
JavaRDD<String> rawTriples1,
JavaRDD<String> rawTriples2,
String SEPARATOR,
JavaRDD<String> entityIds1,
JavaRDD<String> entityIds2,
float MIN_SUPPORT_THRESHOLD,
int K,
int N,
JavaSparkContext jsc) {
Map<Integer,IntArrayList> inNeighbors = new HashMap<>(new RelationsRank().run(rawTriples1, SEPARATOR, entityIds1, MIN_SUPPORT_THRESHOLD, N, true, jsc));
inNeighbors.putAll(new RelationsRank().run(rawTriples2, SEPARATOR, entityIds2, MIN_SUPPORT_THRESHOLD, N, false, jsc));
Broadcast<Map<Integer,IntArrayList>> inNeighbors_BV = jsc.broadcast(inNeighbors);
//JavaPairRDD<Integer, IntArrayList> topKneighborCandidates = getTopKNeighborSims(topKvalueCandidates, inNeighbors_BV, K);
JavaPairRDD<Integer, IntArrayList> topKneighborCandidates = getTopKNeighborSimsSUM(topKvalueCandidates, inNeighbors_BV, K);
return topKneighborCandidates;
}
开发者ID:vefthym,项目名称:MinoanER,代码行数:35,代码来源:CNPNeighborsUnnormalized.java
示例6: run2
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
/**
*
* @param topKvalueCandidates the topK results per entity, acquired from value similarity
* @param rawTriples1 the rdf triples of the first entity collection
* @param rawTriples2 the rdf triples of the second entity collection
* @param SEPARATOR the delimiter that separates subjects, predicates and objects in the rawTriples1 and rawTriples2 files
* @param entityIds1 the mapping of entity urls to entity ids, as it was used in blocking
* @param entityIds2
* @param MIN_SUPPORT_THRESHOLD the minimum support threshold, below which, relations are discarded from top relations
* @param K the K for topK candidate matches
* @param N the N for topN rdf relations (and neighbors)
* @param jsc the java spark context used to load files and broadcast variables
* @return topK neighbor candidates per entity
*/
public JavaPairRDD<Integer, Int2FloatLinkedOpenHashMap> run2(JavaPairRDD<Integer,Int2FloatLinkedOpenHashMap> topKvalueCandidates,
JavaRDD<String> rawTriples1,
JavaRDD<String> rawTriples2,
String SEPARATOR,
JavaRDD<String> entityIds1,
JavaRDD<String> entityIds2,
float MIN_SUPPORT_THRESHOLD,
int K,
int N,
JavaSparkContext jsc) {
Map<Integer,IntArrayList> inNeighbors = new HashMap<>(new RelationsRank().run(rawTriples1, SEPARATOR, entityIds1, MIN_SUPPORT_THRESHOLD, N, true, jsc));
inNeighbors.putAll(new RelationsRank().run(rawTriples2, SEPARATOR, entityIds2, MIN_SUPPORT_THRESHOLD, N, false, jsc));
Broadcast<Map<Integer,IntArrayList>> inNeighbors_BV = jsc.broadcast(inNeighbors);
JavaPairRDD<Integer, Int2FloatLinkedOpenHashMap> topKneighborCandidates = getTopKNeighborSimsSUMWithScores(topKvalueCandidates, inNeighbors_BV, K);
return topKneighborCandidates;
}
开发者ID:vefthym,项目名称:MinoanER,代码行数:33,代码来源:CNPARCS.java
示例7: run
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
/**
*
* @param topKvalueCandidates the topK results per entity, acquired from value similarity
* @param rawTriples1 the rdf triples of the first entity collection
* @param rawTriples2 the rdf triples of the second entity collection
* @param SEPARATOR the delimiter that separates subjects, predicates and objects in the rawTriples1 and rawTriples2 files
* @param entityIds1 the mapping of entity urls to entity ids, as it was used in blocking
* @param entityIds2
* @param MIN_SUPPORT_THRESHOLD the minimum support threshold, below which, relations are discarded from top relations
* @param K the K for topK candidate matches
* @param N the N for topN rdf relations (and neighbors)
* @param jsc the java spark context used to load files and broadcast variables
* @return topK neighbor candidates per entity
*/
public JavaPairRDD<Integer, IntArrayList> run(JavaPairRDD<Integer,Int2FloatLinkedOpenHashMap> topKvalueCandidates,
JavaRDD<String> rawTriples1,
JavaRDD<String> rawTriples2,
String SEPARATOR,
JavaRDD<String> entityIds1,
JavaRDD<String> entityIds2,
float MIN_SUPPORT_THRESHOLD,
int K,
int N,
JavaSparkContext jsc) {
Map<Integer,IntArrayList> inNeighbors = new HashMap<>(new RelationsRank().run(rawTriples1, SEPARATOR, entityIds1, MIN_SUPPORT_THRESHOLD, N, true, jsc));
inNeighbors.putAll(new RelationsRank().run(rawTriples2, SEPARATOR, entityIds2, MIN_SUPPORT_THRESHOLD, N, false, jsc));
Broadcast<Map<Integer,IntArrayList>> inNeighbors_BV = jsc.broadcast(inNeighbors);
//JavaPairRDD<Tuple2<Integer, Integer>, Float> neighborSims = getNeighborSims(topKvalueCandidates, inNeighbors_BV);
//JavaPairRDD<Integer, IntArrayList> topKneighborCandidates = getTopKNeighborSimsOld(neighborSims, K);
JavaPairRDD<Integer, IntArrayList> topKneighborCandidates = getTopKNeighborSims(topKvalueCandidates, inNeighbors_BV, K);
return topKneighborCandidates;
}
开发者ID:vefthym,项目名称:MinoanER,代码行数:36,代码来源:CNPNeighbors.java
示例8: readAndConvertFeatureRDD
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
private static RDD<Tuple2<Object,double[]>> readAndConvertFeatureRDD(
JavaPairRDD<String,float[]> javaRDD,
Broadcast<Map<String,Integer>> bIdToIndex) {
RDD<Tuple2<Integer,double[]>> scalaRDD = javaRDD.mapToPair(t ->
new Tuple2<>(bIdToIndex.value().get(t._1()), t._2())
).mapValues(f -> {
double[] d = new double[f.length];
for (int i = 0; i < d.length; i++) {
d[i] = f[i];
}
return d;
}
).rdd();
// This mimics the persistence level establish by ALS training methods
scalaRDD.persist(StorageLevel.MEMORY_AND_DISK());
@SuppressWarnings("unchecked")
RDD<Tuple2<Object,double[]>> objKeyRDD = (RDD<Tuple2<Object,double[]>>) (RDD<?>) scalaRDD;
return objKeyRDD;
}
开发者ID:oncewang,项目名称:oryx2,代码行数:23,代码来源:ALSUpdate.java
示例9: getRdfsLabels
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
/**
* Get map of rdfs:labels for specified URIs
*
* @param quads Quads to use for retrieving labels
* @param uris Set of URI Strings to find labels for
* @return map of URI -> rdfs:label
*/
private Map<String, String> getRdfsLabels(JavaRDD<Quad> quads, Set<String> uris) {
Broadcast<Set<String>> broadcastURIs = sc.broadcast(uris);
Map<String, String> nonSerializableMap = quads.filter(quad ->
// filter out label predicates for specified subject URIs
quad.getPredicate().isURI() &&
quad.getPredicate().getURI().equals(LABEL_URI) &&
quad.getSubject().isURI() &&
(broadcastURIs.getValue().contains(quad.getSubject().getURI()))
// map to pair of uri, label
).mapToPair(quad -> new Tuple2<>(
quad.getSubject().getURI(),
quad.getObject().getLiteralValue().toString()
)).collectAsMap();
return new HashMap<>(nonSerializableMap);
}
开发者ID:Merck,项目名称:rdf2x,代码行数:24,代码来源:RdfSchemaCollector.java
示例10: find
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
protected boolean find(ColumnCombinationBitset columnCombination) {
if(this.columnCombinationMap.isEmpty()){
return false;
}
else{
Broadcast<ColumnCombinationBitset> bCcb = Singleton.getSparkContext().broadcast(columnCombination);
JavaRDD<ColumnCombinationBitset> check = this.columnCombinationMap.filter((ColumnCombinationBitset ccb) -> ccb.equals((Object) bCcb.value()));
if(check.isEmpty()){
bCcb.destroy();
return false;
}
else{
bCcb.destroy();
return true;
}
}
}
开发者ID:mpoiitis,项目名称:DUCCspark,代码行数:20,代码来源:SimplePruningGraph.java
示例11: getNextParentColumnCombination
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
protected ColumnCombinationBitset getNextParentColumnCombination(ColumnCombinationBitset column) {
Broadcast<ColumnCombinationBitset> bColumn = Singleton.getSparkContext().broadcast(column);
//if minimal positives contain column return null
if(!this.minimalPositives.filter((ColumnCombinationBitset ccb) -> ccb.equals(bColumn.value())).isEmpty()){
return null;
}
List<ColumnCombinationBitset> supersets = column.getDirectSupersets(this.bitmaskForNonUniqueColumns);
JavaRDD<ColumnCombinationBitset> supersetsRdd = Singleton.getSparkContext().parallelize(supersets);
//destroy broadcast variable
bColumn.destroy();
return this.findUnprunedSet(supersetsRdd);
}
开发者ID:mpoiitis,项目名称:DUCCspark,代码行数:17,代码来源:SimpleGraphTraverser.java
示例12: getNextChildColumnCombination
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
protected ColumnCombinationBitset getNextChildColumnCombination(ColumnCombinationBitset column) {
if (column.size() == 1) {
return null;
}
Broadcast<ColumnCombinationBitset> bColumn = Singleton.getSparkContext().broadcast(column);
//if maximal negatives contain column return null
if(!this.maximalNegatives.filter((ColumnCombinationBitset ccb) -> ccb.equals(bColumn.value())).isEmpty()){
return null;
}
List<ColumnCombinationBitset> subsets = column.getDirectSubsets();
JavaRDD<ColumnCombinationBitset> subsetsRdd = Singleton.getSparkContext().parallelize(subsets);
//destroy broadcast variable
bColumn.destroy();
return this.findUnprunedSet(subsetsRdd);
}
开发者ID:mpoiitis,项目名称:DUCCspark,代码行数:20,代码来源:SimpleGraphTraverser.java
示例13: parseFile
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
public JavaRDD<Adult> parseFile(){
JavaPairRDD<String, Long> temp = this.input.zipWithIndex();
Broadcast<ArrayList<Tuple2<String,Integer>>> bColNames = Singleton.getSparkContext().broadcast(this.columnNames);
JavaRDD<Adult> rdd_adults = temp.map((Tuple2<String, Long> tuple) -> {
String[] fields = tuple._1.split(",");
//turn array to list
List<String> temp1 = ImmutableList.copyOf(fields);
ArrayList<String> fieldsList = new ArrayList<>(temp1);
Adult adult = new Adult(bColNames.value(), fieldsList,
tuple._2.intValue());
return adult;
});
return rdd_adults;
}
开发者ID:mpoiitis,项目名称:DUCCspark,代码行数:18,代码来源:CustomParser.java
示例14: doOperation
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
public void doOperation(final ImportJavaRDDOfElements operation, final Context context, final AccumuloStore store) throws OperationException {
final String outputPath = operation.getOption(OUTPUT_PATH);
if (null == outputPath || outputPath.isEmpty()) {
throw new OperationException("Option outputPath must be set for this option to be run against the accumulostore");
}
final String failurePath = operation.getOption(FAILURE_PATH);
if (null == failurePath || failurePath.isEmpty()) {
throw new OperationException("Option failurePath must be set for this option to be run against the accumulostore");
}
final SparkContext sparkContext = SparkContextUtil.getSparkSession(context, store.getProperties()).sparkContext();
final Broadcast<AccumuloElementConverter> broadcast = JavaSparkContext.fromSparkContext(sparkContext).broadcast(store.getKeyPackage().getKeyConverter());
final ElementConverterFunction func = new ElementConverterFunction(broadcast);
final JavaPairRDD<Key, Value> rdd = operation.getInput().flatMapToPair(func);
final ImportKeyValueJavaPairRDDToAccumulo op =
new ImportKeyValueJavaPairRDDToAccumulo.Builder()
.input(rdd)
.failurePath(failurePath)
.outputPath(outputPath)
.build();
store.execute(new OperationChain(op), context);
}
开发者ID:gchq,项目名称:Gaffer,代码行数:23,代码来源:ImportJavaRDDOfElementsHandler.java
示例15: sliceOperations
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
public T sliceOperations(long rl, long ru, long cl, long cu, T block)
throws DMLRuntimeException
{
T ret = null;
for( Broadcast<PartitionedBlock<T>> bc : _pbc ) {
PartitionedBlock<T> pm = bc.value();
T tmp = pm.sliceOperations(rl, ru, cl, cu, block);
if( ret != null )
ret.merge(tmp, false);
else
ret = tmp;
}
return ret;
}
开发者ID:apache,项目名称:systemml,代码行数:17,代码来源:PartitionedBroadcast.java
示例16: renderOverviewImages
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
/**
* Renders CATMAID overview ('small') images for each layer.
*
* @param sparkContext context for current run.
* @param broadcastBoxGenerator box generator broadcast to all worker nodes.
*/
private void renderOverviewImages(final JavaSparkContext sparkContext,
final Broadcast<BoxGenerator> broadcastBoxGenerator) {
final JavaRDD<Double> zValuesRdd = sparkContext.parallelize(zValues);
final JavaRDD<Integer> renderedOverview = zValuesRdd.map((Function<Double, Integer>) z -> {
final BoxGenerator localBoxGenerator = broadcastBoxGenerator.getValue();
localBoxGenerator.renderOverview(z.intValue());
return 1;
});
final long renderedOverviewCount = renderedOverview.count();
LOG.info(""); // empty statement adds newline to lengthy unterminated stage progress lines in log
LOG.info("run: rendered {} overview images", renderedOverviewCount);
}
开发者ID:saalfeldlab,项目名称:render,代码行数:24,代码来源:BoxClient.java
示例17: saveAsShardedHadoopFiles
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
private static void saveAsShardedHadoopFiles(
final JavaSparkContext ctx, final String outputFile, final String referenceFile,
final SAMFormat samOutputFormat, final JavaRDD<SAMRecord> reads, final SAMFileHeader header,
final boolean writeHeader) throws IOException {
// Set the static header on the driver thread.
if (samOutputFormat == SAMFormat.CRAM) {
SparkCRAMOutputFormat.setHeader(header);
} else {
SparkBAMOutputFormat.setHeader(header);
}
final Broadcast<SAMFileHeader> headerBroadcast = ctx.broadcast(header);
// SparkBAM/CRAMOutputFormat are static classes, so we need to copy the header to each worker then call
final JavaRDD<SAMRecord> readsRDD = setHeaderForEachPartition(reads, samOutputFormat, headerBroadcast);
// The expected format for writing is JavaPairRDD where the key is ignored and the value is SAMRecordWritable.
final JavaPairRDD<SAMRecord, SAMRecordWritable> rddSamRecordWriteable = pairReadsWithSAMRecordWritables(headerBroadcast, readsRDD);
rddSamRecordWriteable.saveAsNewAPIHadoopFile(outputFile, SAMRecord.class, SAMRecordWritable.class, getOutputFormat(samOutputFormat, writeHeader), ctx.hadoopConfiguration());
}
开发者ID:broadinstitute,项目名称:gatk,代码行数:22,代码来源:ReadsSparkSink.java
示例18: saveAsShardedHadoopFiles
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
private static void saveAsShardedHadoopFiles(
final JavaSparkContext ctx, final Configuration conf, final String outputFile, JavaRDD<VariantContext> variants,
final VCFHeader header, final boolean writeHeader) throws IOException {
// Set the static header on the driver thread.
SparkVCFOutputFormat.setVCFHeader(header);
final Broadcast<VCFHeader> headerBroadcast = ctx.broadcast(header);
// SparkVCFOutputFormat is a static class, so we need to copy the header to each worker then call
final JavaRDD<VariantContext> variantsRDD = setHeaderForEachPartition(variants, headerBroadcast);
// The expected format for writing is JavaPairRDD where the key is ignored and the value is VariantContextWritable.
final JavaPairRDD<VariantContext, VariantContextWritable> rddVariantContextWriteable = pairVariantsWithVariantContextWritables(variantsRDD);
rddVariantContextWriteable.saveAsNewAPIHadoopFile(outputFile, VariantContext.class, VariantContextWritable.class, getOutputFormat(writeHeader), conf);
}
开发者ID:broadinstitute,项目名称:gatk,代码行数:17,代码来源:VariantsSparkSink.java
示例19: getReadsFunction
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
private static FlatMapFunction<Shard<GATKRead>, ReadWalkerContext> getReadsFunction(
Broadcast<ReferenceMultiSource> bReferenceSource, Broadcast<FeatureManager> bFeatureManager,
SAMSequenceDictionary sequenceDictionary, int readShardPadding) {
return (FlatMapFunction<Shard<GATKRead>, ReadWalkerContext>) shard -> {
// get reference bases for this shard (padded)
SimpleInterval paddedInterval = shard.getInterval().expandWithinContig(readShardPadding, sequenceDictionary);
ReferenceDataSource reference = bReferenceSource == null ? null :
new ReferenceMemorySource(bReferenceSource.getValue().getReferenceBases(paddedInterval), sequenceDictionary);
FeatureManager features = bFeatureManager == null ? null : bFeatureManager.getValue();
return StreamSupport.stream(shard.spliterator(), false)
.map(r -> {
final SimpleInterval readInterval = getReadInterval(r);
return new ReadWalkerContext(r, new ReferenceContext(reference, readInterval), new FeatureContext(features, readInterval));
}).iterator();
};
}
开发者ID:broadinstitute,项目名称:gatk,代码行数:18,代码来源:ReadWalkerSpark.java
示例20: getVariants
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
/**
* Loads variants and the corresponding reads, reference and features into a {@link JavaRDD} for the intervals specified.
* FOr the current implementation the reads context will always be empty.
*
* If no intervals were specified, returns all the variants.
*
* @return all variants as a {@link JavaRDD}, bounded by intervals if specified.
*/
public JavaRDD<VariantWalkerContext> getVariants(JavaSparkContext ctx) {
SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
List<SimpleInterval> intervals = hasIntervals() ? getIntervals() : IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
// use unpadded shards (padding is only needed for reference bases)
final List<ShardBoundary> intervalShards = intervals.stream()
.flatMap(interval -> Shard.divideIntervalIntoShards(interval, variantShardSize, 0, sequenceDictionary).stream())
.collect(Collectors.toList());
JavaRDD<VariantContext> variants = variantsSource.getParallelVariantContexts(drivingVariantFile, getIntervals());
VariantFilter variantFilter = makeVariantFilter();
variants = variants.filter(variantFilter::test);
JavaRDD<Shard<VariantContext>> shardedVariants = SparkSharder.shard(ctx, variants, VariantContext.class, sequenceDictionary, intervalShards, variantShardSize, shuffle);
Broadcast<ReferenceMultiSource> bReferenceSource = hasReference() ? ctx.broadcast(getReference()) : null;
Broadcast<FeatureManager> bFeatureManager = features == null ? null : ctx.broadcast(features);
return shardedVariants.flatMap(getVariantsFunction(bReferenceSource, bFeatureManager, sequenceDictionary, variantShardPadding));
}
开发者ID:broadinstitute,项目名称:gatk,代码行数:24,代码来源:VariantWalkerSpark.java
注:本文中的org.apache.spark.broadcast.Broadcast类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论