Permalink
Please
sign in to comment.
Browse files
Setup PySpark loader, and reorganize.
- remove lots of pom cruft
- Loading branch information
Showing
with
77 additions
and 49 deletions.
@@ -0,0 +1,3 @@ | ||
from twut.common import Tweets | ||
|
||
__all__ = ["Tweets"] |
@@ -0,0 +1,14 @@ | ||
from pyspark.sql import DataFrame | ||
|
||
class Tweets: | ||
def __init__(self, sc, sqlContext, tweets): | ||
self.sc = sc | ||
self.sqlContext = sqlContext | ||
self.loader = sc._jvm.io.archivesunleashed.TwutPy(sc._jsc.sc()) | ||
self.tweets = tweets | ||
|
||
def ids(self): | ||
return DataFrame(self.loader.ids(self.tweets), self.sqlContext) | ||
|
||
def userInfo(self): | ||
return DataFrame(self.loader.userInfo(self.tweets), self.sqlContext) |
@@ -0,0 +1,38 @@ | ||
/* | ||
* Copyright © 2019 The Archives Unleashed Project | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package io.archivesunleashed | ||
|
||
import org.apache.spark.SparkContext | ||
import org.apache.spark.sql.DataFrame | ||
|
||
class TwutPy(sc: SparkContext) { | ||
|
||
/** Creates a DataFrame of Tweet IDs. **/ | ||
def ids(tweets: DataFrame): DataFrame = { | ||
ids(tweets) | ||
} | ||
|
||
/** Creates a DataFrame of Twitter User Info. */ | ||
def userInfo(tweets: DataFrame): DataFrame = { | ||
userInfo(tweets) | ||
} | ||
|
||
/** Creates a DataFame of tweeted urls. */ | ||
def urls(tweets: DataFrame): DataFrame = { | ||
urls(tweets) | ||
} | ||
} |
0 comments on commit
416ccc0