每日总结-23.10.30

发布时间 2023-10-31 18:49:00作者: lao_bing

今天完成关于hadoop中spark的安装和使用

教程地址:https://dblab.xmu.edu.cn/blog/4322/

https://blog.csdn.net/qq_53336526/article/details/131717423

由于之前安装的hadoop版本为2.7.5,因此spark版本改用2.4.5,maven版本依旧可以使用教程中的3.9.2

另外教程中的所有路径都需要修改成自己一致的

/*** SimpleApp.java ***/
import org.apache.spark.api.java.*;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.SparkConf;
 
public class SimpleApp {
   public static void main(String[] args) {
       String logFile = "file:///usr/local/spark/README.md"; // Should be some file on your system
       SparkConf conf=new SparkConf().setMaster("local").setAppName("SimpleApp");
       JavaSparkContext sc=new JavaSparkContext(conf);
       JavaRDD<String> logData = sc.textFile(logFile).cache();
       long numAs = logData.filter(new Function<String, Boolean>() {
           public Boolean call(String s) { return s.contains("a"); }
       }).count();
       long numBs = logData.filter(new Function<String, Boolean>() {
           public Boolean call(String s) { return s.contains("b"); }
       }).count();
       System.out.println("Lines with a: " + numAs + ", lines with b: " + numBs);
   }
}