
get_json_object(regexp_replace(regexp_replace(regexp_replace(split(regexp_replace(content, \"\\\":\\\"\", \"\\\"::\\\"\"), \"::\")(1), \"\\{\"\", \"{\"), \"\"\\}\", \"}\"), \"\\\\\", \"\"), \"$.score\")
\n\u6709\u5927\u4f6c\u77e5\u9053\u5982\u4f55\u8f6c\u4e49\u4e3a spark sql \u7684\u683c\u5f0f\uff1f
\n" }, { "author": { "url": "member/winchang", "name": "winchang", "avatar": "https://cdn.v2ex.com/gravatar/0389fcc392652698c5e17243578ce64b?s=73&d=retro" }, "url": "t/872615", "title": "\u771f\u7684\u6df1\u5165\u4e86\u89e3\u5f00\u6e90\u9879\u76ee\u662f\u52a8\u624b\u5b9e\u73b0--\u300aSpark Core \u7cbe\u7b80\u7248\u300b", "id": "t/872615", "date_published": "2022-08-13T06:46:43+00:00", "content_html": "\u770b\u4e86\u8bb8\u5229\u6770\u8001\u5e08\u7684\u8fd9\u672c\u4e66\uff0c\u8001\u5e08\u5728\u7406\u8bba\u5c42\u9762\u8bb2\u7684\u6bd4\u8f83\u6e05\u695a\u4e86\uff0c\u8bfb\u8d77\u6765\u4e5f\u5bb9\u6613\u7406\u89e3\uff0c\u4f46\u770b\u5b8c\u4f46\u603b\u662f\u611f\u89c9\u8fd8\u5dee\u70b9\u4ec0\u4e48\u3002\u4e8e\u662f\u52a8\u624b\u8d77\u4e86\u4e2a\u9879\u76ee\uff0c\u5b9a\u4e86\u4e2a\u5c0f\u76ee\u6807\uff1a\u5b9e\u73b0 RDD \u7684\u903b\u8f91\u3002
\n\u76ee\u524d RDD \u7684 MVP \u5df2\u7ecf\u5b8c\u6210\uff0c\u6700\u5927\u7684\u6536\u83b7\u662f\u5f7b\u5e95\u7406\u89e3\u4e86\u51e0\u4e2a\u8d39\u89e3\u7684\u95ee\u9898\uff1a
\n1 \uff0cStage \u7684\u5207\u5206\u539f\u7406\uff0c\u4e3a\u4ec0\u4e48\u8981\u8fd9\u4e48\u505a
\n2 \uff0cShuffle \u662f\u5982\u4f55\u5b9e\u73b0\u7684\uff0cwrite/read \u662f\u5982\u4f55\u8854\u63a5
\n3 \uff0cPartition \u4e3a\u4f55\u5982\u6b64\u7684\u91cd\u8981
\n\u6709\u5174\u8da3\u7684\u540c\u5b66\u4e00\u8d77 github \u4ea4\u4e2a\u670b\u53cb\u5427\uff0c\u9879\u76ee\u5730\u5740 https://github.com/changzhiwin/spark-core-analysis \uff0c\u7279\u70b9\uff1a
\n1 \uff0c\u805a\u7126\u5728 RDD \u5c42\u9762\uff08\u8fd9\u662f Spark \u7684\u6839\u672c\uff09
\n2 \uff0c\u5165\u95e8\u660e\u786e\uff0c\u4ee3\u7801\u91cf\u5c0f\uff08\u4fdd\u7559\u903b\u8f91\uff09\uff0c\u5bb9\u6613\u4e0a\u624b\u8fd0\u884c\uff08\u8fd0\u884c\u8d77\u6765\uff0c\u7406\u89e3\u4ee3\u7801\u5c31\u5bb9\u6613\u4e86\uff09
\n" }, { "author": { "url": "member/hitzhaowenqiang", "name": "hitzhaowenqiang", "avatar": "https://cdn.v2ex.com/avatar/0c2a/395b/523239_large.png?m=1641892361" }, "url": "t/862897", "title": "\u6c42\u52a9\u51e0\u4e2a Spark \u95ee\u9898", "id": "t/862897", "date_published": "2022-06-29T04:14:14+00:00", "content_html": "Q1: Someone handed you this dataset (~1GB), and you discovered that it\u2019s over 1,000 tiny files.\nvar df = spark.read.format(\"orc\").load(clean_tracker_cstt_path)\n\u25cb\tUsing Spark, please show how you can improve storage efficiency, and explain why this is important.\n\u25cb\tAfter improving storage efficiency, please explain impact on loading and using dataset in Spark.
\nQ2: Given the schema below, use Spark 2.x Dataframe API to give count of events per day for the last 7 days.
\nroot\n|-- action_id: integer (nullable = true)\n|-- receive_time: timestamp (nullable = true)\n|-- uuid: string (nullable = true)
\nQ3: You have calculated the Daily Event Count above using Spark API. Now please find the Min, Max, Mean, and Standard Deviation of Daily Count by using Scala. Only built-in Scala functions may be used. Please format the answer with 2 decimal places, e.g. \u201cThe Average Daily Count from Last 7 Days is x.xx\u201d.
\n" }, { "author": { "url": "member/laobaozi", "name": "laobaozi", "avatar": "https://cdn.v2ex.com/avatar/f6da/d23b/102334_large.png?m=1729434479" }, "url": "t/837514", "date_modified": "2022-03-02T10:22:14+00:00", "content_html": "\u516c\u53f8\u51c6\u5907\u505a\u4e00\u4e2a\u63a8\u8350\u529f\u80fd\uff0c\u4ece\u6587\u7ae0\u5e93\u4e2d\u6839\u636e\u7528\u6237\u9605\u8bfb\u8bb0\u5f55\u63a8\u8350\u76f8\u5173\u7684\u6587\u7ae0\u3002\u8fd9\u4e2a\u6587\u7ae0\u5e93\u4fdd\u5b58\u4e86\u6240\u6709\u5b50\u516c\u53f8\u7684\u6587\u7ae0\uff0c\u66f4\u65b0\u9891\u7387\u6bd4\u8f83\u9ad8\uff0c\u6240\u4ee5\u6ca1\u6709\u4f7f\u7528\u8ba1\u7b97\u4e24\u4e24\u76f8\u4f3c\u5ea6\u7684\u65b9\u5f0f\u3002\u9700\u6c42\uff1a\u5229\u7528 Spark \u89e3\u6790 xml \u6587\u4ef6\uff0cxml \u7ed3\u6784\u6700\u5927\u6df1\u5ea6\u6709 8 \u5c42\uff0c\u6570\u636e\u6709\u590d\u6742\u7684\u5173\u8054\u5173\u7cfb\uff0c\u5904\u7406\u540e\u4f1a\u63d2\u5165\u5230 20 \u591a\u5f20\u8868\u4e2d\u3002\u7531\u4e8e\u4f7f\u7528\u5de5\u5177\u89e3\u6790\u540e\u7684 DataFrame \u7ed3\u6784\u8fc7\u957f\uff0c\u7b80\u5355\u622a\u53d6\u4e86\u4e00\u90e8\u5206\u5982\u4e0b\uff1a
\n
\u76ee\u524d\u6ca1\u6709\u597d\u7684\u601d\u8def\uff0c\u5c31\u662f\u83b7\u53d6\u6bcf\u4e00\u5217\u5143\u7d20\u4f9d\u6b21\u904d\u5386\u63d0\u53d6\u51fa\u6240\u6709\u9700\u8981\u7684\u5143\u7d20\uff0c\u4f46\u662f\u60f3\u5230\u8981\u904d\u5386 8 \u5c42\u773c\u6cea\u90fd\u8981\u6d41\u4e0b\u6765\u4e86\uff1a
\n// \u5c42\u5c42\u5bf9\u8c61\u904d\u5386\nval identifiers = row.get(0).asInstanceOf[Row].get(0).asInstanceOf[Row].get(0).asInstanceOf[Row].get(0)\n// \u89e3\u6790\u6570\u7ec4\nprintln(identifiers.asInstanceOf[mutable.WrappedArray[AnyRef]](0))\n\n\u5c31\u4ee5\u56fe\u793a\u4e2d\u7684\u7ed3\u6784\u6765\u8bf4\uff0c\u6570\u636e\u4e0d\u7b97\u590d\u6742\uff0c\u4f46\u662f\u5b8c\u6574\u7684\u7ed3\u6784\u5b9e\u5728\u662f\u4ee4\u4eba\u795e\u4f24\uff0c\u8d34\u4e86\u56fe\u6050\u6015\u4f1a\u5360\u636e\u6574\u4e2a\u9875\u9762\u3002\u53e6\u5916\u6700\u7ec8\u8981\u63d2\u5165\u7684 20 \u591a\u5f20\u8868\u4e2d\u5341\u51e0\u5f20\u662f\u5173\u8054\u8868\u3002\n\u521d\u6b21\u4f7f\u7528 Spark \u5904\u7406\u6570\u636e\uff0c\u6c42\u5927\u5bb6\u7ed9\u70b9\u610f\u89c1\u548c\u601d\u8def\uff0c\u4e07\u5206\u611f\u8c22\uff01
\n" }, { "author": { "url": "member/txc106", "name": "txc106", "avatar": "https://cdn.v2ex.com/gravatar/86d3bfbb9d99ba9123dbe72f04d3d732?s=73&d=retro" }, "url": "t/769217", "title": "spark \u5927\u6570\u636e\u79bb\u7ebf\u5206\u6790 \u722c\u866b\u5b58\u5230 csv \u6709\u7684\u5217\u662f\u957f\u5ea6\u4e0d\u56fa\u5b9a\u7684 list \u8bf7\u95ee\u5e94\u8be5\u600e\u4e48\u5b58\u5230 hive\uff1f\u76f4\u63a5\u5b58 list \u5417\uff1f\u8be5\u600e\u4e48\u5206\u6790\u5462\uff1f", "id": "t/769217", "date_published": "2021-04-08T13:50:59+00:00", "content_html": "\u5b8c\u5168\u6ca1\u6709\u5934\u7eea \u53e6\u76ee\u524d\u81ea\u5df1\u5b9a\u7684\u6d41\u7a0b\u662f\u722c\u866b-\u300b hdfs-\u300b\u901a\u8fc7 scala \u9884\u5904\u7406\u540e\u5b58 hive-\u300b scala \u5206\u6790-\u300b mysql-\u300b\u53ef\u89c6\u5316\n\u8bf7\u95ee\u8fd9\u4e2a\u6d41\u7a0b\u6709\u4ec0\u4e48\u95ee\u9898\u5417\uff1f\n\u770b\u5176\u4ed6\u9879\u76ee\u5904\u7406\u7684\u6570\u636e\u90fd\u6ca1\u6709 list \u6211\u662f\u56e0\u4e3a\u90a3\u5217\u6709\u7684\u662f\u7a7a\u6709\u7684\u6709\u4e09\u56db\u4e2a\u6570\u636e \u7136\u540e\u5c31\u76f4\u63a5\u5b58\u7684 list \u8be5\u600e\u4e48\u5904\u7406\u5462\uff1f\u6211\u4e5f\u6ca1\u80fd\u67e5\u5230\u5176\u4ed6\u4eba\u6709\u7c7b\u4f3c\u7684\u5904\u7406\u6d41\u7a0b\u3002\u3002\u3002
\n" }, { "author": { "url": "member/MPAmber", "name": "MPAmber", "avatar": "https://cdn.v2ex.com/gravatar/b768ac0e6da086ec5adafb392a91c359?s=73&d=retro" }, "url": "t/719033", "title": "PayPal \u62db \u8d44\u6df1\u5927\u6570\u636e\u5de5\u7a0b\u5e08 \u5566 - \u6280\u672f\u6808\uff1a Spark, Scala, Java , Python \u7b49", "id": "t/719033", "date_published": "2020-10-27T06:27:12+00:00", "content_html": "\u52a0\u5165 PayPal \u662f\u4ec0\u4e48\u6837\u7684\u4f53\u9a8c\uff1f
\n\u2022 \u8f7b\u677e\u6109\u5feb\u7684\u5de5\u4f5c\u6c1b\u56f4\uff0c\u4f17\u591a\u4f18\u79c0\u4ee5\u53ca nice \u7684\u5c0f\u4f19\u4f34\n\u2022 \u8d85\u957f\u7684\u5047\u671f \u2014\u2014 \u6bcf\u5e74 15 \u5929\u5e74\u5047\u8d77\uff0c\u6bcf\u5de5\u4f5c\u6ee1 1 \u5e74\u52a0\u4e00\u5929\uff08\u6700\u591a 20 \u5929\uff09\uff1b\u9664\u6b64\u4ee5\u5916\uff0c\u5de5\u4f5c\u6ee1 5 \u5e74\u8fd8\u5c06\u83b7\u5f97\u989d\u5916\u7684 4 \u5468\u5e26\u85aa\u5047\u671f\uff01\u4ee5\u53ca 15 \u5929\u5e26\u85aa\u75c5\u5047\u7b49\u7b49\u7b49\u3002\n\u2022 \u5305\u5bb9\u3001\u591a\u5143\u4ee5\u53ca\u56fd\u9645\u5316\u7684\u516c\u53f8\u6587\u5316\n\u2022 \u5e02\u503c 2000 \u4ebf\u7f8e\u5143\uff0c\u5168\u6c11\u6301\u80a1\uff0c\u80a1\u7968\u6fc0\u52b1\uff0c\u85aa\u916c\u4e0d\u8f93 996 \u516c\u53f8\u7684\u201c\u798f\u62a5\u201d\u3002work-life balance\uff0c\u751f\u6d3b\u4e0d\u5e94\u53ea\u6709\u52a0\u73ed\u3002
\nPayPal \u7684\u5927\u6570\u636e\u7ec4\u4f1a\u505a\u4ec0\u4e48\uff1f
\nAt PayPal Global Data Science(GDS) team, we develop machine learning platform and AI applications to improve PayPal\u2019s global business. Machine learning and AI is one of the core competitive advantage of PayPal, which significantly reduced payment risk loss, brought million dollars\u2019 revenue and expanded to multiple domains rapidly. As an engineer in GDS, you will work closely with analytical team, understand the requirement with cutting-edge algorithm, contribute to the core platform, make the research work to a real product. We are looking for strong technologists who are passionate to solve machine learning problems and able to continuously deliver AI solutions in scalable way.
\n\u8fd9\u4e2a\u804c\u4f4d\u7684\u8981\u6c42\u662f\u4ec0\u4e48\uff1f
\nQualifications\n\u2022\tBS, MS, or PhD in Computer Science or related technical discipline (or equivalent).\n\u2022\t8+ years\u2019 work experience in software development area with at least 5+ years\u2019 experience in Java programming.\n\u2022\tExcellent understanding of computer science fundamentals, data structures, and algorithms.\n\u2022\tExcellent problem solving skills, can triage and resolve critical tech issues without supervision.\n\u2022\tExpertise required in object-oriented design methodology and application development in Java.\n\u2022\tExperience in big data technology such as Hadoop/Spark/Pig/HBASE/Streaming\n\u2022\tMastering at least one scripting language such as Unix Shell/Python/Perl/JS\n\u2022\tHands on web application development skill (HTML5/CSS/JS) is a very big plus\n\u2022\tKnowledge on Machine Learning application pipeline is a very big plus\n\u2022\tProven results oriented person with a delivery focus in a high velocity, high quality environment.\n\u2022\tStrong communication skills in Oral and Written English.\n\u2022\tWorking Experience in Multi-national Company is a plus.\n\u2022\tGeek style is a big plus.
\n\u8fd8\u6709\u9644\u52a0\uff01\uff01\uff01\uff01\uff01\uff01\uff01\uff01\uff1a
\n\u5982\u679c\u6709\u5174\u8da3\u7684\u5c0f\u4f19\u4f34\uff0c\u540c\u65f6\u53c8\u6709\u70b9\u62c5\u5fc3\u5e74\u7ec8\u5956\u7684\u635f\u5931\u7684\u8bdd\u3002\n\u4e0d\u7528\u62c5\u5fc3\uff01\u57fa\u4e8e\u9762\u8bd5\u60c5\u51b5\uff0cPayPal \u4f1a\u8003\u8651\u7ed9\u4e88\u989d\u5916\u5956\u91d1\u6216\u8005 Sign-On Bonus \u4e4b\u7c7b\u51cf\u5c11\u4f60\u7684\u5e74\u7ec8\u5956\u635f\u5931\uff0c\n\u8ba9\u4f60\u5728\u5e74\u5e95\u65e2\u80fd\u8f7b\u677e\u641e\u5b9a\u65b0\u7684 Exciting \u7684\u5de5\u4f5c\u673a\u4f1a\uff0c\u53c8\u80fd\u5373\u65f6\u5f97\u5230\u4e00\u4e9b\u8865\u507f\uff0c\u4f55\u4e50\u4e0d\u4e3a\u5462\uff1f
\n" }, { "author": { "url": "member/starry97", "name": "starry97", "avatar": "https://cdn.v2ex.com/gravatar/3f64428d3e972dc0924896f71180fba1?s=73&d=retro" }, "url": "t/695639", "date_modified": "2020-08-04T13:14:52+00:00", "content_html": "
\u5173\u4e8e\u4e0a\u9762\u90a3\u4e2a\u56fe\uff0cShuffleMapStage \u4e2d\uff0c\u6709\u4e09\u4e2a RDD\uff0c\u6bcf\u4e2a RDD \u6709\u4e09\u4e2a\u5206\u533a\n\u6211\u770b\u7f51\u4e0a\u6587\u7ae0\u90fd\u662f\u8bf4\u4e00\u4e2a\u5206\u533a\u5bf9\u5e94\u4e00\u4e2a task\uff0c
\n1.\u4e0a\u9762\u90a3\u4e2a ShuffleMapStage \u4e2d\u9636\u6bb5\u91cc\u662f\u5426\u5b58\u5728 9 \u4e2a Task \u3002
\n2.\u4f46\u7ad9\u5728 pipeline \u89d2\u5ea6\u4e0a\u770b\uff0c\u5e94\u8be5\u53ea\u6709\u4e09\u4e2a task\uff0c\u6bcf\u4e2a RDD \u7684\u4e00\u4e2a\u5206\u533a\u7ec4\u5408\u6210\u4e00\u4e2a task \u3002
\n\u4e0a\u9762\u4e24\u79cd\u8bf4\u6cd5\uff0c\u54ea\u79cd\u662f\u6b63\u786e\u7684\u5462\uff0c\u88ab\u641e\u7cca\u6d82\u4e86\u3002\u8c22\u8c22\uff01
\n", "date_published": "2020-08-04T13:12:30+00:00", "title": "\u5173\u4e8e Spark Task \u7684\u7591\u95ee", "id": "t/695639" }, { "author": { "url": "member/sodadev", "name": "sodadev", "avatar": "https://cdn.v2ex.com/avatar/5b5b/6a3c/396232_large.png?m=1736745109" }, "url": "t/676825", "title": "\u6709\u6ca1\u6709\u5728\u6ef4\u6ef4\u6216\u8005\u5176\u4ed6\u7f51\u7ea6\u8f66\u516c\u53f8\u7684\u540c\u5b66\uff0c\u8bf7\u6559\u4e00\u4e2a\u6570\u636e\u91cf\u7684\u95ee\u9898", "id": "t/676825", "date_published": "2020-05-29T13:10:52+00:00", "content_html": "\u4e00\u53f0\u8f66\u5b50\u5982\u679c\u65e9\u4e0a 9 \u70b9\u5f00\u59cb\u63a5\u5355 10 \u4e2a\u5c0f\u65f6\u4f1a\u4ea7\u751f\u591a\u5c11\u6570\u636e\u91cf\u5462\uff0c\u8fd9\u4e9b\u6570\u636e\u91cf\u5206\u522b\u90fd\u5305\u542b\u4ec0\u4e48\u4fe1\u606f\uff0c\u8c22\u8c22\u89e3\u7b54\uff01
\n" }, { "author": { "url": "member/qianxaingmoli", "name": "qianxaingmoli", "avatar": "https://cdn.v2ex.com/avatar/1775/74f3/474260_large.png?m=1690038346" }, "url": "t/674431", "title": "spark \u4f5c\u4e1a\u6c42\u52a9\uff0c\u5254\u9664\u7a7a\u503c\u5927\u4e8e\u4e09\u7684\u884c", "id": "t/674431", "date_published": "2020-05-22T08:30:10+00:00", "content_html": "\u5c06\u7f3a\u5931\u503c\u5927\u4e8e n \uff08 n=3 \uff09\u4e2a\u7684\u6570\u636e\u6761\u76ee\u5254\u9664\u51fa\u539f\u59cb\u6570\u636e\u96c6\uff0c\u5e76\u8f93\u51fa\u5254\u9664\u7684\u6761\u76ee\u6570\u91cf\uff1b
\n\u6709\u6ca1\u6709\u5927\u4f6c\u80fd\u8bf4\u4e0b scala \u7684\u5199\u6cd5,rdd \u5904\u7406\u5b8c\u591a\u884c\u4f1a\u6324\u5728\u4e00\u8d77\u4e86\uff0cdataframe \u5199\u4e86\u53c8\u4e0d\u592a\u5bf9\n
\u770b\u4e86\u5b98\u65b9\u597d\u50cf\u652f\u6301 scala\uff0c\u4e0d\u652f\u6301 kotlin\uff0c\u4e0d\u77e5\u9053\u7528 kotlin \u65b9\u4e0d\u65b9\u4fbf\uff0c scala \u6ca1\u5b66\u8fc7\u3002
\n" }, { "author": { "url": "member/muziling", "name": "muziling", "avatar": "https://cdn.v2ex.com/gravatar/8509ed5054cef8bc3fafd7dd6e9a8947?s=73&d=retro" }, "url": "t/664357", "title": "\u73b0\u5728\u5199 spark \u7a0b\u5e8f\uff0c\u90fd\u662f\u7528 scala \u5417", "id": "t/664357", "date_published": "2020-04-20T09:25:25+00:00", "content_html": "spark\uff0ckotlin \u4e0d\u88ab\u5b98\u65b9\u652f\u6301\u5417\uff0ckotlin \u597d\u5b66\uff0c\nscala \u611f\u89c9\u5165\u95e8\u4e0d\u6613\u3002
\n" }, { "author": { "url": "member/rootzeal", "name": "rootzeal", "avatar": "https://cdn.v2ex.com/gravatar/1b6ba67d896e4f373bb68a35c6d1c47d?s=73&d=retro" }, "url": "t/547675", "title": "spark \u6838\u5fc3\u6784\u4ef6\u4e4b Dependency \u5bbd\u7a84\u4f9d\u8d56", "id": "t/547675", "date_published": "2019-03-23T04:34:22+00:00", "content_html": "https://mp.weixin.qq.com/s/QmceOaI7aP1YAmyec-IpmAspark 2.2.0 \u9ed8\u8ba4\u5b89\u88c5\uff0c\u5565\u90fd\u6ca1\u52a8\npython 2.7.9\nipython 5.4.1\n\u8fd0\u884c network_wordcount \u4f8b\u5b50\u3002\n\u8dd1 scala \u811a\u672c\u6210\u529f\uff0c\u4f46\u662f\u8fd0\u884c python \u7684\u4f8b\u5b50\uff0c\u76f4\u63a5\u62a5\u9519\uff1a\nJob aborted due to stage failure: Task 0 in stage 2.0 failed 1 times, most recent failure: Lost task 0.0 in stage 2.0 (TID 1, localhost, executor driver): java.lang.IllegalArgumentException: port out of range:459092027
\n\u81ea\u5df1\u53bb\u8c37\u6b4c\u4e86\u4e00\u4e0b\uff0c\u8fd9\u4e2a bug \u88ab\u63d0\u4ea4\u8fc7\uff1a\nhttps://issues.apache.org/jira/browse/SPARK-7688
\n\u4f46\u662f\u88ab\u8ba4\u4e3a\u4e0d\u662f bug,\u76f4\u63a5\u5173\u6389\u4e86\n\u600e\u4e48\u89e3\u51b3\u7684\u6ca1\u8bf4\u3002\n\u6709\u8fbe\u4eba\u77e5\u9053\u600e\u4e48\u641e\u4e48\uff1f
\n", "date_published": "2017-11-24T03:11:55+00:00", "title": "spark straming\u3002submit Python \u811a\u672c\u62a5\u9519\u3002", "id": "t/409151" }, { "author": { "url": "member/Livid", "name": "Livid", "avatar": "https://cdn.v2ex.com/avatar/c4ca/4238/1_large.png?m=1776858751" }, "url": "t/343942", "title": "CPython\uff0c PyPy \u548c Scala \u5728 Spark \u5e73\u53f0\u4e0a\u7684\u6027\u80fd\u5bf9\u6bd4", "id": "t/343942", "date_published": "2017-02-28T22:18:40+00:00", "content_html": "http://emptypipes.org/2015/01/17/python-vs-scala-vs-spark/\r\u6309\u89c2\u5bdf\u770b\uff0c\u662f\u90fd\u4e0d\u4f1a\u751f\u6210\u65b0\u7684 map task \u7684\uff0c\u7eaf\u7cb9\u7684\u672c\u5730\u6267\u884c\uff0c\u4f46\u54ea\u513f\u6709\u5bf9\u5e94\u7684\u8bf4\u660e\u5462\uff1f
\n", "date_published": "2016-09-14T14:26:15+00:00", "title": "Spark/Scala \u7684\u7ec6\u8282\u8ba8\u8bba\uff1a\u5728 map task \u91cc\u7684 map \u4f1a\u5f97\u5230\u5982\u4f55\u7684\u5904\u7406\uff1f", "id": "t/306340" }, { "author": { "url": "member/jaymiao", "name": "jaymiao", "avatar": "https://cdn.v2ex.com/avatar/1677/0795/63420_large.png?m=1400836349" }, "url": "t/294408", "date_modified": "2016-07-23T09:36:22+00:00", "content_html": "http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.package\n\u5728 spark \u5b98\u65b9\u6587\u6863\u4e0a\u67e5\u8be2\u51fd\u6570\u597d\u8d39\u52b2.\u4e00\u7ea7\u7b5b\u9009\u53ea\u80fd\u4ee5 package \u4e3a\u5173\u952e\u8bcd\uff0c\u5982\u679c\u4e0d\u77e5\u9053\u4e00\u4e2a\u51fd\u6570\u662f\u54ea\u4e2a package \u7684\uff0c\u5c31\u65e0\u6cd5\u8fdb\u884c\u641c\u7d22\u3002\u6bd4\u5982 predict \u51fd\u6570\uff0c\u53ea\u80fd\u662f\u5148\u641c\u7d22 Predict \u5305\u7136\u540e\u4e0b\u9762\u624d\u6709 predict \u51fd\u6570\u3002
\n\u5927\u5bb6\u6709\u5565\u597d\u65b9\u6cd5\u8fdb\u884c spark \u7684\u65b9\u6cd5\u641c\u7d22\u4e48\u3002
\n", "date_published": "2016-07-23T08:56:07+00:00", "title": "SPARK \u6587\u6863\u67e5\u8be2\u597d\u8d39\u52b2", "id": "t/294408" }, { "author": { "url": "member/qfdk", "name": "qfdk", "avatar": "https://cdn.v2ex.com/avatar/3680/4f41/64426_large.png?m=1754811275" }, "url": "t/288072", "date_modified": "2017-05-14T07:37:53+00:00", "content_html": "\u6700\u8fd1\u505a\u5927\u6570\u636e\u7684\u9879\u76ee\uff0c\u78b0\u5230\u4e86\u4e00\u4e2a\u5751\uff0c Running on yarn \u7684\u65f6\u5019\u6709\u4e24\u4e2a\u6a21\u5f0f\uff0c\u4e00\u4e2a\u662f client \u4e00\u4e2a\u662f cluster \uff0c\n\u4f46\u662f\u6211\u7684 Big jar \u91cc\u9762\u9700\u8981\u8bfb \u53d6\u914d\u7f6e\u6587\u4ef6\uff0c\u914d\u7f6e\u6587\u4ef6\u5728\u672c\u5730\uff0c\u6240\u7528\u7528 cluster \u6a21\u5f0f\u4f1a\u51fa\u73b0 FilenotfondException , excutor \u4e0d\u77e5\u9053\u8fd9\u4e2a\u6587\u4ef6\u7684\u4f4d\u7f6e\uff0c\u73b0\u5728\u60f3\u8ba9excutor \u77e5\u9053\u8fd9\u4e2a\u6587\u4ef6\u7684\u4f4d\u7f6e\u548c\u5185\u5bb9
\u627e\u5230\u4e86\u51e0\u4e2a\u53c2\u6570 \u5c31\u662f spark-submit \u52a0\u4e0a--files \u4f46\u662f\u4f3c\u4e4e\u6ca1\u6709\u8d77\u5230\u4f5c\u7528\uff0c\u6c42\u4e2a\u6b63\u786e\u7684\u59ff\u52bf\u3002\n\u6709\u4e24\u4e2a\u914d\u7f6e\u6587\u4ef6\uff0c\u4e00\u4e2a\u662f project.propeties \uff0c \u4e00\u4e2a\u662f parser.properties \u7528 Scala \u641e\u5f97\uff0c\u4f46\u662f io \u53ea\u80fd\u8bfb\u53d6\u672c\u5730\u6587\u4ef6\uff0c\u8fd9\u4e2a\u6bd4\u8f83\u5751\uff0c\u56e0\u4e3a hadoop \u4e0a\u9762\u7684\u6587\u4ef6\u7cfb\u7edf\u4f3c\u4e4e\u548c\u672c\u5730\u7684\u4e0d\u4e00\u6837\u3002
\n\u8fd8\u6709\u4e00\u4e2a\u65b9\u6cd5\u4f3c\u4e4e\u662f\u7528 --properties-file \u4f46\u662f\u8fd9\u4e2a\u53ea\u80fd\u8f7d\u5165\u914d\u7f6e\u3002\u6765\u4e2a\u6b63\u786e\u7684\u59ff\u52bf\u6025\u7b49\u3002*
\nspark-submit --class \"app.Runml\" --master yarn-cluster --files \"/home/expertise/BigData2016/conf/project.properties\",\"/home/expertise/BigData2016/conf/2016.properties\" --jar ~/BigData2016/ml-assembly-1.0.jar\n\u51fa\u73b0\u4e86 FileNotFond \uff0c\u4f46\u662f client \u6a21\u5f0f\u5c31 ok \u3002\n\u90c1\u95f7\u6b7b\u4e86\uff0c\u4e0d\u60f3\u91cd\u65b0\u6539 parser \u7684\u4ee3\u7801\uff0c\u8fd9\u91cc\u662f\u8bfb\u53d6\u6587\u4ef6\u7684\u4ee3\u7801\uff0c\u8fd4\u56de\u4e00\u4e2a properties \uff0c\u53ef\u4ee5\u4e4b\u95f4 properties.get(\u4f60\u60f3\u8981\u7684\u53c2\u6570)
\nobject Tools {\n\n def conf(file: String): Properties = {\n val properties = new Properties()\n properties.load(new FileInputStream(file))\n properties\n }\n}\n\n", "date_published": "2016-06-24T08:31:56+00:00", "title": "Apache Spark \u4e4b\u95f4\u7684\u5171\u4eab\u9879\u76ee\u914d\u7f6e\u6587\u4ef6\u95ee\u9898", "id": "t/288072" }, { "author": { "url": "member/xiaochong", "name": "xiaochong", "avatar": "https://cdn.v2ex.com/avatar/f725/da31/30386_large.png?m=1354862636" }, "url": "t/95416", "title": "\u7591\u95ee\uff1aspark\u5bf9\u4e8e\u8fed\u4ee3\u8fd0\u7b97\u573a\u666f\u5f88\u6709\u4f18\u52bf\uff0c\u90a3\u5bf9\u4e8e\u8fed\u4ee3\u4e0d\u4e25\u91cd\u7684\u8ba1\u7b97\u573a\u666f\u5462\uff1f", "id": "t/95416", "date_published": "2014-01-02T12:10:30+00:00", "content_html": "\u5bf9\u4e8e\u6c47\u603bn\u5929\u7684log\u8fd9\u79cd\u626b\u4e00\u6b21\u6216\u51e0\u6b21\u7684\u975e\u4e25\u91cd\u8fed\u4ee3\u8fd0\u7b97\u7684\u573a\u666f\uff0cspark\u76f8\u6bd4\u4e8ehadoop\u800c\u8a00\u7684\u6027\u80fd\u662f\u7ee7\u7eed\u6709\u4f18\u52bf\u3001\u5dee\u4e0d\u591a\u8fd8\u662f\u5dee\u4e86\uff1f