tobby48 4 лет назад
Родитель
Сommit
fcfa9627ef

+ 44
- 0
src/main/java/kr/co/swh/lecture/opensource/apache/spark/JavaWordCount.java Просмотреть файл

@@ -0,0 +1,44 @@
1
+package kr.co.swh.lecture.opensource.apache.spark; 
2
+
3
+import java.util.Arrays;
4
+import java.util.List;
5
+import java.util.regex.Pattern;
6
+
7
+import org.apache.spark.SparkConf;
8
+import org.apache.spark.api.java.JavaPairRDD;
9
+import org.apache.spark.api.java.JavaRDD;
10
+import org.apache.spark.sql.SparkSession;
11
+
12
+import scala.Tuple2;
13
+
14
+public final class JavaWordCount {
15
+	private static final Pattern SPACE = Pattern.compile(" ");
16
+
17
+	public static void main(String[] args) throws Exception {
18
+
19
+		String text = "sadfasdf sdfsdfasd fdsfsdf asdfasdf sdfasdf sadfdf";
20
+		
21
+		SparkConf conf = new SparkConf();
22
+		conf.setMaster("local[2]");
23
+//		SparkSession spark = new SparkSession(new SparkConf().setAppName("Spark WordCount").setMaster("local[2]"));
24
+		SparkSession spark = SparkSession
25
+				.builder()
26
+				.appName("JavaWordCount")
27
+//				.config(conf);
28
+				.getOrCreate();
29
+
30
+		JavaRDD<String> lines = spark.read().textFile(text).javaRDD();
31
+
32
+		JavaRDD<String> words = lines.flatMap(s -> Arrays.asList(SPACE.split(s)).iterator());
33
+
34
+		JavaPairRDD<String, Integer> ones = words.mapToPair(s -> new Tuple2<>(s, 1));
35
+
36
+		JavaPairRDD<String, Integer> counts = ones.reduceByKey((i1, i2) -> i1 + i2);
37
+
38
+		List<Tuple2<String, Integer>> output = counts.collect();
39
+		for (Tuple2<?,?> tuple : output) {
40
+			System.out.println(tuple._1() + ": " + tuple._2());
41
+		}
42
+		spark.stop();
43
+	}
44
+}

+ 69
- 0
src/main/java/kr/co/swh/lecture/opensource/apache/spark/Test.java Просмотреть файл

@@ -0,0 +1,69 @@
1
+package kr.co.swh.lecture.opensource.apache.spark; 
2
+import java.util.Arrays;
3
+import java.util.List;
4
+
5
+import org.apache.spark.sql.Dataset;
6
+import org.apache.spark.sql.Encoders;
7
+import org.apache.spark.sql.Row;
8
+import org.apache.spark.sql.SparkSession;
9
+
10
+/**
11
+ * <pre>
12
+ * kr.co.swh.lecture.opensource.apache.spark 
13
+ * Test.java
14
+ *
15
+ * 설명 :
16
+ * </pre>
17
+ * 
18
+ * @since : 2020. 11. 8.
19
+ * @author : tobby48
20
+ * @version : v1.0
21
+ */
22
+public class Test {
23
+
24
+
25
+	public static void main(String[] args) {
26
+		// TODO Auto-generated method stub
27
+		
28
+		SparkSession spark = SparkSession
29
+				.builder()
30
+				.appName("JavaWordCount")
31
+				.getOrCreate();
32
+		
33
+		// A JSON dataset is pointed to by path.
34
+		// The path can be either a single text file or a directory storing text files
35
+		Dataset<Row> people = spark.read().json("examples/src/main/resources/people.json");
36
+
37
+		// The inferred schema can be visualized using the printSchema() method
38
+		people.printSchema();
39
+		// root
40
+		//  |-- age: long (nullable = true)
41
+		//  |-- name: string (nullable = true)
42
+
43
+		// Creates a temporary view using the DataFrame
44
+		people.createOrReplaceTempView("people");
45
+
46
+		// SQL statements can be run by using the sql methods provided by spark
47
+		Dataset<Row> namesDF = spark.sql("SELECT name FROM people WHERE age BETWEEN 13 AND 19");
48
+		namesDF.show();
49
+		// +------+
50
+		// |  name|
51
+		// +------+
52
+		// |Justin|
53
+		// +------+
54
+
55
+		// Alternatively, a DataFrame can be created for a JSON dataset represented by
56
+		// a Dataset<String> storing one JSON object per string.
57
+		List<String> jsonData = Arrays.asList(
58
+		        "{\"name\":\"Yin\",\"address\":{\"city\":\"Columbus\",\"state\":\"Ohio\"}}");
59
+		Dataset<String> anotherPeopleDataset = spark.createDataset(jsonData, Encoders.STRING());
60
+		Dataset<Row> anotherPeople = spark.read().json(anotherPeopleDataset);
61
+		anotherPeople.show();
62
+		// +---------------+----+
63
+		// |        address|name|
64
+		// +---------------+----+
65
+		// |[Columbus,Ohio]| Yin|
66
+		// +---------------+----+
67
+	}
68
+
69
+}