Permalink
Please
sign in to comment.
Browse files
Python formatting, and gitignore additions. (#326)
- Run black and isort on Python files. - Move Spark config to example file. - Update gitignore for 7a61f0e additions.
- Loading branch information...
Showing
with
115 additions
and 59 deletions.
- +5 −0 .gitignore
- +1 −2 src/main/python/aut/__init__.py
- +1 −1 src/main/python/aut/common.py
- +6 −4 src/main/python/aut/udfs.py
- +10 −6 src/main/python/tf/detect.py
- +11 −5 src/main/python/tf/extract_images.py
- +28 −22 src/main/python/tf/model/object_detection.py
- +2 −3 src/main/python/tf/model/preprocess.py
- +51 −16 src/main/python/tf/util/init.py
- 0 src/main/python/tf/util/{spark.conf → spark.conf.example}
@@ -1,5 +1,4 @@ | ||
from aut.common import WebArchive | ||
from aut.udfs import extract_domain | ||
|
||
__all__ = ['WebArchive', 'extract_domain'] | ||
|
||
__all__ = ["WebArchive", "extract_domain"] |
@@ -1,11 +1,13 @@ | ||
from pyspark.sql.functions import udf | ||
from pyspark.sql.types import StringType | ||
|
||
|
||
def extract_domain_func(url): | ||
url = url.replace('http://', '').replace('https://', '') | ||
if '/' in url: | ||
return url.split('/')[0].replace('www.', '') | ||
url = url.replace("http://", "").replace("https://", "") | ||
if "/" in url: | ||
return url.split("/")[0].replace("www.", "") | ||
else: | ||
return url.replace('www.', '') | ||
return url.replace("www.", "") | ||
|
||
|
||
extract_domain = udf(extract_domain_func, StringType()) |
File renamed without changes.
0 comments on commit
bd5ef14