public class CrossDataSet<L,R> extends DataSet<scala.Tuple2<L,R>>
DataSet
that results from a cross
operation. The result of a default cross is a
tuple containing the two values from the two sides of the cartesian product. The result of the
cross can be changed by specifying a custom cross function using the apply
method or by
providing a RichCrossFunction
.
Example:
val left = ...
val right = ...
val crossResult = left.cross(right) {
(left, right) => new MyCrossResult(left, right)
}
Constructor and Description |
---|
CrossDataSet(CrossOperator<L,R,scala.Tuple2<L,R>> defaultCross,
DataSet<L> leftInput,
DataSet<R> rightInput) |
Modifier and Type | Method and Description |
---|---|
static AggregateDataSet<T> |
aggregate(Aggregations agg,
int field) |
static AggregateDataSet<T> |
aggregate(Aggregations agg,
String field) |
<O> DataSet<O> |
apply(CrossFunction<L,R,O> crosser,
TypeInformation<O> evidence$3,
scala.reflect.ClassTag<O> evidence$4)
Creates a new
DataSet by passing each pair of values to the given function. |
<O> DataSet<O> |
apply(scala.Function2<L,R,O> fun,
TypeInformation<O> evidence$1,
scala.reflect.ClassTag<O> evidence$2)
Creates a new
DataSet where the result for each pair of elements is the result
of the given function. |
static <F> F |
clean(F f,
boolean checkSerializable) |
static <F> boolean |
clean$default$2() |
static <O> UnfinishedCoGroupOperation<T,O> |
coGroup(DataSet<O> other,
scala.reflect.ClassTag<O> evidence$30) |
static scala.collection.Seq<T> |
collect() |
static <R> DataSet<R> |
combineGroup(scala.Function2<scala.collection.Iterator<T>,Collector<R>,scala.runtime.BoxedUnit> fun,
TypeInformation<R> evidence$26,
scala.reflect.ClassTag<R> evidence$27) |
static <R> DataSet<R> |
combineGroup(GroupCombineFunction<T,R> combiner,
TypeInformation<R> evidence$24,
scala.reflect.ClassTag<R> evidence$25) |
static long |
count() |
static <L,R> CrossDataSet<L,R> |
createCrossOperator(DataSet<L> leftInput,
DataSet<R> rightInput,
CrossOperatorBase.CrossHint crossHint)
Creates a default cross operation with Tuple2 as result.
|
static <O> CrossDataSet<T,O> |
cross(DataSet<O> other) |
static <O> CrossDataSet<T,O> |
crossWithHuge(DataSet<O> other) |
static <O> CrossDataSet<T,O> |
crossWithTiny(DataSet<O> other) |
static DataSet<T> |
distinct() |
static <K> DataSet<T> |
distinct(scala.Function1<T,K> fun,
TypeInformation<K> evidence$28) |
static DataSet<T> |
distinct(scala.collection.Seq<Object> fields) |
static DataSet<T> |
distinct(String firstField,
scala.collection.Seq<String> otherFields) |
static DataSet<T> |
filter(FilterFunction<T> filter) |
static DataSet<T> |
filter(scala.Function1<T,Object> fun) |
static DataSet<T> |
first(int n) |
static <R> DataSet<R> |
flatMap(FlatMapFunction<T,R> flatMapper,
TypeInformation<R> evidence$12,
scala.reflect.ClassTag<R> evidence$13) |
static <R> DataSet<R> |
flatMap(scala.Function1<T,scala.collection.TraversableOnce<R>> fun,
TypeInformation<R> evidence$16,
scala.reflect.ClassTag<R> evidence$17) |
static <R> DataSet<R> |
flatMap(scala.Function2<T,Collector<R>,scala.runtime.BoxedUnit> fun,
TypeInformation<R> evidence$14,
scala.reflect.ClassTag<R> evidence$15) |
static <O> UnfinishedOuterJoinOperation<T,O> |
fullOuterJoin(DataSet<O> other) |
static <O> UnfinishedOuterJoinOperation<T,O> |
fullOuterJoin(DataSet<O> other,
JoinOperatorBase.JoinHint strategy) |
static ExecutionEnvironment |
getExecutionEnvironment() |
static int |
getParallelism() |
static TypeInformation<T> |
getType() |
static <K> GroupedDataSet<T> |
groupBy(scala.Function1<T,K> fun,
TypeInformation<K> evidence$29) |
static GroupedDataSet<T> |
groupBy(scala.collection.Seq<Object> fields) |
static GroupedDataSet<T> |
groupBy(String firstField,
scala.collection.Seq<String> otherFields) |
static DataSet<T> |
iterate(int maxIterations,
scala.Function1<DataSet<T>,DataSet<T>> stepFunction) |
static <R> DataSet<T> |
iterateDelta(DataSet<R> workset,
int maxIterations,
int[] keyFields,
boolean solutionSetUnManaged,
scala.Function2<DataSet<T>,DataSet<R>,scala.Tuple2<DataSet<T>,DataSet<R>>> stepFunction,
scala.reflect.ClassTag<R> evidence$32) |
static <R> DataSet<T> |
iterateDelta(DataSet<R> workset,
int maxIterations,
int[] keyFields,
scala.Function2<DataSet<T>,DataSet<R>,scala.Tuple2<DataSet<T>,DataSet<R>>> stepFunction,
scala.reflect.ClassTag<R> evidence$31) |
static <R> DataSet<T> |
iterateDelta(DataSet<R> workset,
int maxIterations,
String[] keyFields,
boolean solutionSetUnManaged,
scala.Function2<DataSet<T>,DataSet<R>,scala.Tuple2<DataSet<T>,DataSet<R>>> stepFunction,
scala.reflect.ClassTag<R> evidence$34) |
static <R> DataSet<T> |
iterateDelta(DataSet<R> workset,
int maxIterations,
String[] keyFields,
scala.Function2<DataSet<T>,DataSet<R>,scala.Tuple2<DataSet<T>,DataSet<R>>> stepFunction,
scala.reflect.ClassTag<R> evidence$33) |
static DataSet<T> |
iterateWithTermination(int maxIterations,
scala.Function1<DataSet<T>,scala.Tuple2<DataSet<T>,DataSet<?>>> stepFunction) |
static DataSet<T> |
javaSet() |
static <O> UnfinishedJoinOperation<T,O> |
join(DataSet<O> other) |
static <O> UnfinishedJoinOperation<T,O> |
join(DataSet<O> other,
JoinOperatorBase.JoinHint strategy) |
static <O> UnfinishedJoinOperation<T,O> |
joinWithHuge(DataSet<O> other) |
static <O> UnfinishedJoinOperation<T,O> |
joinWithTiny(DataSet<O> other) |
static <O> UnfinishedOuterJoinOperation<T,O> |
leftOuterJoin(DataSet<O> other) |
static <O> UnfinishedOuterJoinOperation<T,O> |
leftOuterJoin(DataSet<O> other,
JoinOperatorBase.JoinHint strategy) |
static <R> DataSet<R> |
map(scala.Function1<T,R> fun,
TypeInformation<R> evidence$4,
scala.reflect.ClassTag<R> evidence$5) |
static <R> DataSet<R> |
map(MapFunction<T,R> mapper,
TypeInformation<R> evidence$2,
scala.reflect.ClassTag<R> evidence$3) |
static <R> DataSet<R> |
mapPartition(scala.Function1<scala.collection.Iterator<T>,scala.collection.TraversableOnce<R>> fun,
TypeInformation<R> evidence$10,
scala.reflect.ClassTag<R> evidence$11) |
static <R> DataSet<R> |
mapPartition(scala.Function2<scala.collection.Iterator<T>,Collector<R>,scala.runtime.BoxedUnit> fun,
TypeInformation<R> evidence$8,
scala.reflect.ClassTag<R> evidence$9) |
static <R> DataSet<R> |
mapPartition(MapPartitionFunction<T,R> partitionMapper,
TypeInformation<R> evidence$6,
scala.reflect.ClassTag<R> evidence$7) |
static AggregateDataSet<T> |
max(int field) |
static AggregateDataSet<T> |
max(String field) |
static DataSet<T> |
maxBy(scala.collection.Seq<Object> fields) |
static AggregateDataSet<T> |
min(int field) |
static AggregateDataSet<T> |
min(String field) |
static DataSet<T> |
minBy(scala.collection.Seq<Object> fields) |
static ResourceSpec |
minResources() |
static DataSet<T> |
name(String name) |
static DataSink<T> |
output(OutputFormat<T> outputFormat) |
static <K> DataSet<T> |
partitionByHash(scala.Function1<T,K> fun,
TypeInformation<K> evidence$35) |
static DataSet<T> |
partitionByHash(scala.collection.Seq<Object> fields) |
static DataSet<T> |
partitionByHash(String firstField,
scala.collection.Seq<String> otherFields) |
static <K> DataSet<T> |
partitionByRange(scala.Function1<T,K> fun,
TypeInformation<K> evidence$36) |
static DataSet<T> |
partitionByRange(scala.collection.Seq<Object> fields) |
static DataSet<T> |
partitionByRange(String firstField,
scala.collection.Seq<String> otherFields) |
static <K> DataSet<T> |
partitionCustom(Partitioner<K> partitioner,
scala.Function1<T,K> fun,
TypeInformation<K> evidence$39) |
static <K> DataSet<T> |
partitionCustom(Partitioner<K> partitioner,
int field,
TypeInformation<K> evidence$37) |
static <K> DataSet<T> |
partitionCustom(Partitioner<K> partitioner,
String field,
TypeInformation<K> evidence$38) |
static ResourceSpec |
preferredResources() |
static void |
print() |
static DataSink<T> |
print(String sinkIdentifier) |
static DataSink<T> |
printOnTaskManager(String prefix) |
static void |
printToErr() |
static DataSink<T> |
printToErr(String sinkIdentifier) |
static DataSet<T> |
rebalance() |
static DataSet<T> |
reduce(scala.Function2<T,T,T> fun) |
static DataSet<T> |
reduce(ReduceFunction<T> reducer) |
static <R> DataSet<R> |
reduceGroup(scala.Function1<scala.collection.Iterator<T>,R> fun,
TypeInformation<R> evidence$22,
scala.reflect.ClassTag<R> evidence$23) |
static <R> DataSet<R> |
reduceGroup(scala.Function2<scala.collection.Iterator<T>,Collector<R>,scala.runtime.BoxedUnit> fun,
TypeInformation<R> evidence$20,
scala.reflect.ClassTag<R> evidence$21) |
static <R> DataSet<R> |
reduceGroup(GroupReduceFunction<T,R> reducer,
TypeInformation<R> evidence$18,
scala.reflect.ClassTag<R> evidence$19) |
static DataSet<T> |
registerAggregator(String name,
Aggregator<?> aggregator) |
static <O> UnfinishedOuterJoinOperation<T,O> |
rightOuterJoin(DataSet<O> other) |
static <O> UnfinishedOuterJoinOperation<T,O> |
rightOuterJoin(DataSet<O> other,
JoinOperatorBase.JoinHint strategy) |
static DataSet<T> |
setParallelism(int parallelism) |
static <K> DataSet<T> |
sortPartition(scala.Function1<T,K> fun,
Order order,
TypeInformation<K> evidence$40) |
static DataSet<T> |
sortPartition(int field,
Order order) |
static DataSet<T> |
sortPartition(String field,
Order order) |
static AggregateDataSet<T> |
sum(int field) |
static AggregateDataSet<T> |
sum(String field) |
static DataSet<T> |
union(DataSet<T> other) |
static DataSet<T> |
withBroadcastSet(DataSet<?> data,
String name) |
static DataSet<T> |
withForwardedFields(scala.collection.Seq<String> forwardedFields) |
static DataSet<T> |
withForwardedFieldsFirst(scala.collection.Seq<String> forwardedFields) |
static DataSet<T> |
withForwardedFieldsSecond(scala.collection.Seq<String> forwardedFields) |
static DataSet<T> |
withParameters(Configuration parameters) |
static DataSink<T> |
write(FileOutputFormat<T> outputFormat,
String filePath,
FileSystem.WriteMode writeMode) |
static FileSystem.WriteMode |
write$default$3() |
static DataSink<T> |
writeAsCsv(String filePath,
String rowDelimiter,
String fieldDelimiter,
FileSystem.WriteMode writeMode) |
static String |
writeAsCsv$default$2() |
static String |
writeAsCsv$default$3() |
static FileSystem.WriteMode |
writeAsCsv$default$4() |
static DataSink<T> |
writeAsText(String filePath,
FileSystem.WriteMode writeMode) |
static FileSystem.WriteMode |
writeAsText$default$2() |
aggregate, aggregate, clean, coGroup, collect, combineGroup, combineGroup, count, cross, crossWithHuge, crossWithTiny, distinct, distinct, distinct, distinct, filter, filter, first, flatMap, flatMap, flatMap, fullOuterJoin, fullOuterJoin, getExecutionEnvironment, getParallelism, getType, groupBy, groupBy, groupBy, iterate, iterateDelta, iterateDelta, iterateDelta, iterateDelta, iterateWithTermination, javaSet, join, join, joinWithHuge, joinWithTiny, leftOuterJoin, leftOuterJoin, map, map, mapPartition, mapPartition, mapPartition, max, max, maxBy, min, min, minBy, minResources, name, output, partitionByHash, partitionByHash, partitionByHash, partitionByRange, partitionByRange, partitionByRange, partitionCustom, partitionCustom, partitionCustom, preferredResources, print, print, printOnTaskManager, printToErr, printToErr, rebalance, reduce, reduce, reduceGroup, reduceGroup, reduceGroup, registerAggregator, rightOuterJoin, rightOuterJoin, setParallelism, sortPartition, sortPartition, sortPartition, sum, sum, union, withBroadcastSet, withForwardedFields, withForwardedFieldsFirst, withForwardedFieldsSecond, withParameters, write, writeAsCsv, writeAsText
public static <L,R> CrossDataSet<L,R> createCrossOperator(DataSet<L> leftInput, DataSet<R> rightInput, CrossOperatorBase.CrossHint crossHint)
leftInput
- (undocumented)rightInput
- (undocumented)crossHint
- (undocumented)public static TypeInformation<T> getType()
public static ExecutionEnvironment getExecutionEnvironment()
public static DataSet<T> javaSet()
public static <F> F clean(F f, boolean checkSerializable)
public static DataSet<T> setParallelism(int parallelism)
public static int getParallelism()
public static ResourceSpec minResources()
public static ResourceSpec preferredResources()
public static DataSet<T> registerAggregator(String name, Aggregator<?> aggregator)
public static DataSet<T> withForwardedFields(scala.collection.Seq<String> forwardedFields)
public static DataSet<T> withForwardedFieldsFirst(scala.collection.Seq<String> forwardedFields)
public static DataSet<T> withForwardedFieldsSecond(scala.collection.Seq<String> forwardedFields)
public static DataSet<T> withParameters(Configuration parameters)
public static <R> DataSet<R> map(MapFunction<T,R> mapper, TypeInformation<R> evidence$2, scala.reflect.ClassTag<R> evidence$3)
public static <R> DataSet<R> map(scala.Function1<T,R> fun, TypeInformation<R> evidence$4, scala.reflect.ClassTag<R> evidence$5)
public static <R> DataSet<R> mapPartition(MapPartitionFunction<T,R> partitionMapper, TypeInformation<R> evidence$6, scala.reflect.ClassTag<R> evidence$7)
public static <R> DataSet<R> mapPartition(scala.Function2<scala.collection.Iterator<T>,Collector<R>,scala.runtime.BoxedUnit> fun, TypeInformation<R> evidence$8, scala.reflect.ClassTag<R> evidence$9)
public static <R> DataSet<R> mapPartition(scala.Function1<scala.collection.Iterator<T>,scala.collection.TraversableOnce<R>> fun, TypeInformation<R> evidence$10, scala.reflect.ClassTag<R> evidence$11)
public static <R> DataSet<R> flatMap(FlatMapFunction<T,R> flatMapper, TypeInformation<R> evidence$12, scala.reflect.ClassTag<R> evidence$13)
public static <R> DataSet<R> flatMap(scala.Function2<T,Collector<R>,scala.runtime.BoxedUnit> fun, TypeInformation<R> evidence$14, scala.reflect.ClassTag<R> evidence$15)
public static <R> DataSet<R> flatMap(scala.Function1<T,scala.collection.TraversableOnce<R>> fun, TypeInformation<R> evidence$16, scala.reflect.ClassTag<R> evidence$17)
public static DataSet<T> filter(FilterFunction<T> filter)
public static AggregateDataSet<T> aggregate(Aggregations agg, int field)
public static AggregateDataSet<T> aggregate(Aggregations agg, String field)
public static AggregateDataSet<T> sum(int field)
public static AggregateDataSet<T> max(int field)
public static AggregateDataSet<T> min(int field)
public static AggregateDataSet<T> sum(String field)
public static AggregateDataSet<T> max(String field)
public static AggregateDataSet<T> min(String field)
public static DataSet<T> reduce(ReduceFunction<T> reducer)
public static DataSet<T> reduce(scala.Function2<T,T,T> fun)
public static <R> DataSet<R> reduceGroup(GroupReduceFunction<T,R> reducer, TypeInformation<R> evidence$18, scala.reflect.ClassTag<R> evidence$19)
public static <R> DataSet<R> reduceGroup(scala.Function2<scala.collection.Iterator<T>,Collector<R>,scala.runtime.BoxedUnit> fun, TypeInformation<R> evidence$20, scala.reflect.ClassTag<R> evidence$21)
public static <R> DataSet<R> reduceGroup(scala.Function1<scala.collection.Iterator<T>,R> fun, TypeInformation<R> evidence$22, scala.reflect.ClassTag<R> evidence$23)
public static <R> DataSet<R> combineGroup(GroupCombineFunction<T,R> combiner, TypeInformation<R> evidence$24, scala.reflect.ClassTag<R> evidence$25)
public static <R> DataSet<R> combineGroup(scala.Function2<scala.collection.Iterator<T>,Collector<R>,scala.runtime.BoxedUnit> fun, TypeInformation<R> evidence$26, scala.reflect.ClassTag<R> evidence$27)
public static DataSet<T> first(int n)
public static <K> DataSet<T> distinct(scala.Function1<T,K> fun, TypeInformation<K> evidence$28)
public static DataSet<T> distinct()
public static DataSet<T> distinct(String firstField, scala.collection.Seq<String> otherFields)
public static <K> GroupedDataSet<T> groupBy(scala.Function1<T,K> fun, TypeInformation<K> evidence$29)
public static GroupedDataSet<T> groupBy(scala.collection.Seq<Object> fields)
public static GroupedDataSet<T> groupBy(String firstField, scala.collection.Seq<String> otherFields)
public static <O> UnfinishedJoinOperation<T,O> join(DataSet<O> other)
public static <O> UnfinishedJoinOperation<T,O> join(DataSet<O> other, JoinOperatorBase.JoinHint strategy)
public static <O> UnfinishedJoinOperation<T,O> joinWithTiny(DataSet<O> other)
public static <O> UnfinishedJoinOperation<T,O> joinWithHuge(DataSet<O> other)
public static <O> UnfinishedOuterJoinOperation<T,O> fullOuterJoin(DataSet<O> other)
public static <O> UnfinishedOuterJoinOperation<T,O> fullOuterJoin(DataSet<O> other, JoinOperatorBase.JoinHint strategy)
public static <O> UnfinishedOuterJoinOperation<T,O> leftOuterJoin(DataSet<O> other)
public static <O> UnfinishedOuterJoinOperation<T,O> leftOuterJoin(DataSet<O> other, JoinOperatorBase.JoinHint strategy)
public static <O> UnfinishedOuterJoinOperation<T,O> rightOuterJoin(DataSet<O> other)
public static <O> UnfinishedOuterJoinOperation<T,O> rightOuterJoin(DataSet<O> other, JoinOperatorBase.JoinHint strategy)
public static <O> UnfinishedCoGroupOperation<T,O> coGroup(DataSet<O> other, scala.reflect.ClassTag<O> evidence$30)
public static <O> CrossDataSet<T,O> cross(DataSet<O> other)
public static <O> CrossDataSet<T,O> crossWithTiny(DataSet<O> other)
public static <O> CrossDataSet<T,O> crossWithHuge(DataSet<O> other)
public static DataSet<T> iterate(int maxIterations, scala.Function1<DataSet<T>,DataSet<T>> stepFunction)
public static DataSet<T> iterateWithTermination(int maxIterations, scala.Function1<DataSet<T>,scala.Tuple2<DataSet<T>,DataSet<?>>> stepFunction)
public static <R> DataSet<T> iterateDelta(DataSet<R> workset, int maxIterations, int[] keyFields, scala.Function2<DataSet<T>,DataSet<R>,scala.Tuple2<DataSet<T>,DataSet<R>>> stepFunction, scala.reflect.ClassTag<R> evidence$31)
public static <R> DataSet<T> iterateDelta(DataSet<R> workset, int maxIterations, int[] keyFields, boolean solutionSetUnManaged, scala.Function2<DataSet<T>,DataSet<R>,scala.Tuple2<DataSet<T>,DataSet<R>>> stepFunction, scala.reflect.ClassTag<R> evidence$32)
public static <R> DataSet<T> iterateDelta(DataSet<R> workset, int maxIterations, String[] keyFields, scala.Function2<DataSet<T>,DataSet<R>,scala.Tuple2<DataSet<T>,DataSet<R>>> stepFunction, scala.reflect.ClassTag<R> evidence$33)
public static <R> DataSet<T> iterateDelta(DataSet<R> workset, int maxIterations, String[] keyFields, boolean solutionSetUnManaged, scala.Function2<DataSet<T>,DataSet<R>,scala.Tuple2<DataSet<T>,DataSet<R>>> stepFunction, scala.reflect.ClassTag<R> evidence$34)
public static DataSet<T> partitionByHash(String firstField, scala.collection.Seq<String> otherFields)
public static <K> DataSet<T> partitionByHash(scala.Function1<T,K> fun, TypeInformation<K> evidence$35)
public static DataSet<T> partitionByRange(String firstField, scala.collection.Seq<String> otherFields)
public static <K> DataSet<T> partitionByRange(scala.Function1<T,K> fun, TypeInformation<K> evidence$36)
public static <K> DataSet<T> partitionCustom(Partitioner<K> partitioner, int field, TypeInformation<K> evidence$37)
public static <K> DataSet<T> partitionCustom(Partitioner<K> partitioner, String field, TypeInformation<K> evidence$38)
public static <K> DataSet<T> partitionCustom(Partitioner<K> partitioner, scala.Function1<T,K> fun, TypeInformation<K> evidence$39)
public static DataSet<T> rebalance()
public static <K> DataSet<T> sortPartition(scala.Function1<T,K> fun, Order order, TypeInformation<K> evidence$40)
public static DataSink<T> writeAsText(String filePath, FileSystem.WriteMode writeMode)
public static DataSink<T> writeAsCsv(String filePath, String rowDelimiter, String fieldDelimiter, FileSystem.WriteMode writeMode)
public static DataSink<T> write(FileOutputFormat<T> outputFormat, String filePath, FileSystem.WriteMode writeMode)
public static DataSink<T> output(OutputFormat<T> outputFormat)
public static void print()
public static void printToErr()
public static <F> boolean clean$default$2()
public static FileSystem.WriteMode writeAsText$default$2()
public static String writeAsCsv$default$2()
public static String writeAsCsv$default$3()
public static FileSystem.WriteMode writeAsCsv$default$4()
public static FileSystem.WriteMode write$default$3()
public <O> DataSet<O> apply(scala.Function2<L,R,O> fun, TypeInformation<O> evidence$1, scala.reflect.ClassTag<O> evidence$2)
DataSet
where the result for each pair of elements is the result
of the given function.fun
- (undocumented)evidence$1
- (undocumented)evidence$2
- (undocumented)public <O> DataSet<O> apply(CrossFunction<L,R,O> crosser, TypeInformation<O> evidence$3, scala.reflect.ClassTag<O> evidence$4)
DataSet
by passing each pair of values to the given function.
The function can output zero or more elements using the Collector
which will form the
result.
A RichCrossFunction
can be used to access the
broadcast variables and the RuntimeContext
.
crosser
- (undocumented)evidence$3
- (undocumented)evidence$4
- (undocumented)Copyright © 2014–2018 The Apache Software Foundation. All rights reserved.