mirror of
https://git.freebsd.org/ports.git
synced 2025-07-18 17:59:20 -04:00
New port: misc/libpostal: Library for parsing/normalizing street addresses around the world
PR: 224262 Submitted by: Dmitri Goutnik <dg@syrec.org> Approved by: adamw (mentor) Differential Revision: https://reviews.freebsd.org/D13468
This commit is contained in:
parent
f0294bafaa
commit
4dbc2863a7
Notes:
svn2git
2021-03-31 03:12:20 +00:00
svn path=/head/; revision=456691
11 changed files with 224 additions and 0 deletions
|
@ -250,6 +250,7 @@
|
|||
SUBDIR += libisocodes
|
||||
SUBDIR += libkdeedu
|
||||
SUBDIR += libmetalink
|
||||
SUBDIR += libpostal
|
||||
SUBDIR += libpri
|
||||
SUBDIR += libsupertone
|
||||
SUBDIR += libutf
|
||||
|
|
58
misc/libpostal/Makefile
Normal file
58
misc/libpostal/Makefile
Normal file
|
@ -0,0 +1,58 @@
|
|||
# $FreeBSD$
|
||||
|
||||
PORTNAME= libpostal
|
||||
DISTVERSIONPREFIX= v
|
||||
DISTVERSION= 1.0.0
|
||||
CATEGORIES= misc geography
|
||||
|
||||
MAINTAINER= dg@syrec.org
|
||||
COMMENT= Library for parsing/normalizing street addresses around the world
|
||||
|
||||
LICENSE= MIT
|
||||
LICENSE_FILE= ${WRKSRC}/LICENSE
|
||||
|
||||
RUN_DEPENDS= curl:ftp/curl
|
||||
|
||||
USES= autoreconf libtool localbase
|
||||
GNU_CONFIGURE= yes
|
||||
USE_LDCONFIG= yes
|
||||
CONFIGURE_ARGS= --datadir=${LIBPOSTAL_DATADIR} \
|
||||
--disable-data-download
|
||||
USE_GITHUB= yes
|
||||
GH_ACCOUNT= openvenues
|
||||
TEST_TARGET= check
|
||||
|
||||
LIBPOSTAL_DATADIR?= /var/db/${PORTNAME}
|
||||
|
||||
SUB_FILES= pkg-message
|
||||
SUB_LIST= PORTNAME=${PORTNAME} \
|
||||
LIBPOSTAL_DATADIR=${LIBPOSTAL_DATADIR}
|
||||
|
||||
PORTDOCS= README.md
|
||||
|
||||
OPTIONS_DEFINE= CBLAS DOCS SSE2 STATIC
|
||||
CBLAS_DESC= Build with CBLAS/OPENBLAS
|
||||
SSE2_DESC= Enable SSE2 optimization
|
||||
OPTIONS_SUB= yes
|
||||
|
||||
CBLAS_BUILD_DEPENDS= ${LOCALBASE}/include/cblas.h:math/cblas
|
||||
CBLAS_LIB_DEPENDS= libopenblas.so:math/openblas
|
||||
CBLAS_CONFIGURE_WITH= cblas=${LOCALBASE}/lib/libopenblas.so
|
||||
|
||||
SSE2_CONFIGURE_ENABLE= sse2
|
||||
|
||||
STATIC_CONFIGURE_ENABLE= static
|
||||
|
||||
pre-configure:
|
||||
@cd ${WRKSRC} && ${SH} bootstrap.sh
|
||||
|
||||
post-install:
|
||||
@${STRIP_CMD} ${STAGEDIR}${PREFIX}/lib/libpostal.so
|
||||
${INSTALL_PROGRAM} ${WRKSRC}/src/address_parser ${STAGEDIR}${PREFIX}/bin
|
||||
${INSTALL_SCRIPT} ${WRKSRC}/src/libpostal ${STAGEDIR}${PREFIX}/bin
|
||||
|
||||
post-install-DOCS-on:
|
||||
@${MKDIR} ${STAGEDIR}${DOCSDIR}
|
||||
${INSTALL_MAN} ${PORTDOCS:S|^|${WRKSRC}/|} ${STAGEDIR}${DOCSDIR}
|
||||
|
||||
.include <bsd.port.mk>
|
3
misc/libpostal/distinfo
Normal file
3
misc/libpostal/distinfo
Normal file
|
@ -0,0 +1,3 @@
|
|||
TIMESTAMP = 1513006366
|
||||
SHA256 (openvenues-libpostal-v1.0.0_GH0.tar.gz) = 3035af7e15b2894069753975d953fa15a86d968103913dbf8ce4b8aa26231644
|
||||
SIZE (openvenues-libpostal-v1.0.0_GH0.tar.gz) = 5537587
|
11
misc/libpostal/files/patch-configure.ac
Normal file
11
misc/libpostal/files/patch-configure.ac
Normal file
|
@ -0,0 +1,11 @@
|
|||
--- configure.ac.orig 2017-04-07 21:40:27 UTC
|
||||
+++ configure.ac
|
||||
@@ -19,7 +19,7 @@ AC_CONFIG_HEADERS([config.h])
|
||||
AC_PROG_CC_C99
|
||||
AC_PROG_INSTALL
|
||||
|
||||
-LDFLAGS="$LDFLAGS -L/usr/local/lib"
|
||||
+#LDFLAGS="$LDFLAGS -L/usr/local/lib"
|
||||
|
||||
# Checks for libraries.
|
||||
AC_SEARCH_LIBS([log],
|
74
misc/libpostal/files/patch-src_Makefile.am
Normal file
74
misc/libpostal/files/patch-src_Makefile.am
Normal file
|
@ -0,0 +1,74 @@
|
|||
--- src/Makefile.am.orig 2017-04-07 21:40:27 UTC
|
||||
+++ src/Makefile.am
|
||||
@@ -1,11 +1,11 @@
|
||||
# Inherited from autoconf / user-specified
|
||||
CFLAGS_CONF = @CFLAGS@
|
||||
-CFLAGS_BASE = -Wall -Wextra -Wno-unused-function -Wformat -Werror=format-security -Winit-self -Wno-sign-compare -DLIBPOSTAL_DATA_DIR='"$(datadir)/libpostal"' -g $(CFLAGS_CONF)
|
||||
+CFLAGS_BASE = -Wall -Wextra -Wno-unused-function -Wformat -Werror=format-security -Winit-self -Wno-sign-compare -DLIBPOSTAL_DATA_DIR='"$(datadir)"' $(CFLAGS_CONF)
|
||||
CFLAGS_O0 = $(CFLAGS_BASE) -O0
|
||||
CFLAGS_O1 = $(CFLAGS_BASE) -O1
|
||||
CFLAGS_O2 = $(CFLAGS_BASE) -O2
|
||||
CFLAGS_O3 = $(CFLAGS_BASE) -O3
|
||||
-DEFAULT_INCLUDES = -I.. -I/usr/local/include
|
||||
+DEFAULT_INCLUDES = -I..
|
||||
|
||||
# Wonky but have to be able to override the user's optimization level to compile the scanner
|
||||
# as it takes an unreasonably long time to compile with the optimizer on.
|
||||
@@ -14,7 +14,7 @@ CFLAGS =
|
||||
lib_LTLIBRARIES = libpostal.la
|
||||
libpostal_la_SOURCES = libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c numex.c utf8proc/utf8proc.c cmp/cmp.c normalize.c features.c unicode_scripts.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c ngrams.c
|
||||
libpostal_la_LIBADD = libscanner.la $(CBLAS_LIBS)
|
||||
-libpostal_la_CFLAGS = $(CFLAGS_O2)
|
||||
+libpostal_la_CFLAGS = $(CFLAGS_BASE)
|
||||
libpostal_la_LDFLAGS = -version-info @LIBPOSTAL_SO_VERSION@
|
||||
|
||||
dist_bin_SCRIPTS = libpostal_data
|
||||
@@ -30,37 +30,37 @@ noinst_PROGRAMS = libpostal bench addres
|
||||
|
||||
libpostal_SOURCES = main.c json_encode.c
|
||||
libpostal_LDADD = libpostal.la
|
||||
-libpostal_CFLAGS = $(CFLAGS_O3)
|
||||
+libpostal_CFLAGS = $(CFLAGS_BASE)
|
||||
bench_SOURCES = bench.c
|
||||
bench_LDADD = libpostal.la libscanner.la $(CBLAS_LIBS)
|
||||
-bench_CFLAGS = $(CFLAGS_O3)
|
||||
+bench_CFLAGS = $(CFLAGS_BASE)
|
||||
address_parser_SOURCES = address_parser_cli.c json_encode.c linenoise/linenoise.c libpostal.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c numex.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c
|
||||
address_parser_LDADD = libscanner.la $(CBLAS_LIBS)
|
||||
-address_parser_CFLAGS = $(CFLAGS_O3)
|
||||
+address_parser_CFLAGS = $(CFLAGS_BASE)
|
||||
|
||||
build_address_dictionary_SOURCES = address_dictionary_builder.c address_dictionary.c file_utils.c string_utils.c trie.c trie_search.c utf8proc/utf8proc.c
|
||||
-build_address_dictionary_CFLAGS = $(CFLAGS_O3)
|
||||
+build_address_dictionary_CFLAGS = $(CFLAGS_BASE)
|
||||
build_numex_table_SOURCES = numex_table_builder.c numex.c file_utils.c string_utils.c tokens.c trie.c trie_search.c utf8proc/utf8proc.c
|
||||
-build_numex_table_CFLAGS = $(CFLAGS_O3)
|
||||
+build_numex_table_CFLAGS = $(CFLAGS_BASE)
|
||||
build_trans_table_SOURCES = transliteration_table_builder.c transliterate.c trie.c trie_search.c file_utils.c string_utils.c utf8proc/utf8proc.c
|
||||
-build_trans_table_CFLAGS = $(CFLAGS_O3)
|
||||
+build_trans_table_CFLAGS = $(CFLAGS_BASE)
|
||||
address_parser_train_SOURCES = address_parser_train.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_trainer.c crf_trainer.c crf_trainer_averaged_perceptron.c averaged_perceptron_tagger.c address_dictionary.c normalize.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c shuffle.c utf8proc/utf8proc.c ngrams.c
|
||||
address_parser_train_LDADD = libscanner.la $(CBLAS_LIBS)
|
||||
-address_parser_train_CFLAGS = $(CFLAGS_O3)
|
||||
+address_parser_train_CFLAGS = $(CFLAGS_BASE)
|
||||
|
||||
address_parser_test_SOURCES = address_parser_test.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c graph.c graph_builder.c float_utils.c averaged_perceptron_tagger.c address_dictionary.c normalize.c features.c unicode_scripts.c transliterate.c trie.c trie_search.c trie_utils.c string_utils.c tokens.c file_utils.c utf8proc/utf8proc.c ngrams.c
|
||||
address_parser_test_LDADD = libscanner.la $(CBLAS_LIBS)
|
||||
-address_parser_test_CFLAGS = $(CFLAGS_O3)
|
||||
+address_parser_test_CFLAGS = $(CFLAGS_BASE)
|
||||
|
||||
language_classifier_train_SOURCES = language_classifier_train.c language_classifier.c language_features.c language_classifier_io.c logistic_regression_trainer.c logistic_regression.c logistic.c sparse_matrix.c sparse_matrix_utils.c features.c minibatch.c float_utils.c stochastic_gradient_descent.c ftrl.c regularization.c cartesian_product.c normalize.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c shuffle.c
|
||||
language_classifier_train_LDADD = libscanner.la $(CBLAS_LIBS)
|
||||
-language_classifier_train_CFLAGS = $(CFLAGS_O3)
|
||||
+language_classifier_train_CFLAGS = $(CFLAGS_BASE)
|
||||
language_classifier_SOURCES = language_classifier_cli.c language_classifier.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c
|
||||
language_classifier_LDADD = libscanner.la $(CBLAS_LIBS)
|
||||
-language_classifier_CFLAGS = $(CFLAGS_O3)
|
||||
+language_classifier_CFLAGS = $(CFLAGS_BASE)
|
||||
language_classifier_test_SOURCES = language_classifier_test.c language_classifier.c language_classifier_io.c language_features.c logistic_regression.c logistic.c sparse_matrix.c features.c minibatch.c float_utils.c normalize.c transliterate.c trie.c trie_search.c trie_utils.c address_dictionary.c string_utils.c file_utils.c utf8proc/utf8proc.c unicode_scripts.c
|
||||
language_classifier_test_LDADD = libscanner.la $(CBLAS_LIBS)
|
||||
-language_classifier_test_CFLAGS = $(CFLAGS_O3)
|
||||
+language_classifier_test_CFLAGS = $(CFLAGS_BASE)
|
||||
|
||||
|
||||
pkginclude_HEADERS = libpostal.h
|
23
misc/libpostal/files/patch-src_libpostal__data
Normal file
23
misc/libpostal/files/patch-src_libpostal__data
Normal file
|
@ -0,0 +1,23 @@
|
|||
--- src/libpostal_data.orig 2017-04-07 21:40:27 UTC
|
||||
+++ src/libpostal_data
|
||||
@@ -78,9 +78,9 @@ download_multipart() {
|
||||
else
|
||||
max=$size;
|
||||
fi;
|
||||
- printf "%s\0%s\0%s\0%s\0%s\0" "$i" "$offset" "$max" "$url" "$part_filename"
|
||||
+ printf "%s\0%s\0%s\0%s\0%s\0%s\0%s\0" "x" "$i" "$offset" "$max" "$url" "$part_filename"
|
||||
offset=$((offset+CHUNK_SIZE))
|
||||
- done | xargs -0 -n 5 -P $NUM_WORKERS sh -c "$DOWNLOAD_PART" --
|
||||
+ done | xargs -0 -n 6 -P $NUM_WORKERS sh -c "$DOWNLOAD_PART" --
|
||||
|
||||
> $local_path
|
||||
|
||||
@@ -176,6 +176,8 @@ if [ $COMMAND = "download" ]; then
|
||||
download_file $LIBPOSTAL_LANG_CLASS_UPDATED_PATH $LIBPOSTAL_DATA_DIR $lang_class_s3_prefix $LIBPOSTAL_LANG_CLASS_FILE "language classifier data file" $LANGUAGE_CLASSIFIER_MODULE_DIR
|
||||
fi
|
||||
|
||||
+ chown -R root:wheel $LIBPOSTAL_DATA_DIR
|
||||
+
|
||||
if [ "$LIBPOSTAL_DATA_DIR_VERSION" != "$LIBPOSTAL_VERSION_STRING" ]; then
|
||||
echo $LIBPOSTAL_VERSION_STRING > $LIBPOSTAL_DATA_VERSION_FILE;
|
||||
fi
|
15
misc/libpostal/files/patch-src_sparkey_Makefile.am
Normal file
15
misc/libpostal/files/patch-src_sparkey_Makefile.am
Normal file
|
@ -0,0 +1,15 @@
|
|||
--- src/sparkey/Makefile.am.orig 2017-04-07 21:40:27 UTC
|
||||
+++ src/sparkey/Makefile.am
|
||||
@@ -1,5 +1,5 @@
|
||||
CFLAGS_CONF = @CFLAGS@
|
||||
-CFLAGS = -I/usr/local/include -O2 -Wall -Wextra -Wfloat-equal -Wshadow -Wpointer-arith -Werror -pedantic $(CFLAGS_CONF)
|
||||
+CFLAGS = -Wall -Wextra -Wfloat-equal -Wshadow -Wpointer-arith -Werror -pedantic $(CFLAGS_CONF)
|
||||
|
||||
noinst_LTLIBRARIES = libsparkey.la
|
||||
libsparkey_la_SOURCES = endiantools.h hashheader.h logheader.h \
|
||||
@@ -8,4 +8,4 @@ logreader.c returncodes.c util.c buf.h h
|
||||
sparkey.h util.h endiantools.c \
|
||||
hashheader.c hashreader.c logheader.c logwriter.c MurmurHash3.c \
|
||||
sparkey-internal.h
|
||||
-libsparkey_la_LDFLAGS = -L/usr/local/lib
|
||||
+#libsparkey_la_LDFLAGS = -L/usr/local/lib
|
20
misc/libpostal/files/patch-test_Makefile.am
Normal file
20
misc/libpostal/files/patch-test_Makefile.am
Normal file
|
@ -0,0 +1,20 @@
|
|||
--- test/Makefile.am.orig 2017-04-07 21:40:27 UTC
|
||||
+++ test/Makefile.am
|
||||
@@ -1,9 +1,9 @@
|
||||
-CFLAGS_BASE = -Wfloat-equal -Wpointer-arith -std=gnu99 -DLIBPOSTAL_DATA_DIR='"$(datadir)/libpostal"' -g
|
||||
+CFLAGS_BASE = -Wfloat-equal -Wpointer-arith -std=gnu99 -DLIBPOSTAL_DATA_DIR='"$(datadir)"'
|
||||
CFLAGS_O0 = $(CFLAGS_BASE) -O0
|
||||
CFLAGS_O1 = $(CFLAGS_BASE) -O1
|
||||
CFLAGS_O2 = $(CFLAGS_BASE) -O2
|
||||
CFLAGS_O3 = $(CFLAGS_BASE) -O3
|
||||
-DEFAULT_INCLUDES = -I.. -I/usr/local/include
|
||||
+DEFAULT_INCLUDES = -I..
|
||||
|
||||
CFLAGS = $(CFLAGS_BASE)
|
||||
|
||||
@@ -11,4 +11,4 @@ TESTS = test_libpostal
|
||||
noinst_PROGRAMS = test_libpostal
|
||||
test_libpostal_SOURCES = test.c test_expand.c test_parser.c test_transliterate.c test_numex.c test_trie.c test_string_utils.c test_crf_context.c
|
||||
test_libpostal_LDADD = ../src/libpostal.la $(CBLAS_LIBS)
|
||||
-test_libpostal_CFLAGS = $(CFLAGS_O3)
|
||||
+test_libpostal_CFLAGS = $(CFLAGS_BASE)
|
4
misc/libpostal/files/pkg-message.in
Normal file
4
misc/libpostal/files/pkg-message.in
Normal file
|
@ -0,0 +1,4 @@
|
|||
%%PORTNAME%% requires model data (about 1.5GB) which can be downloaded using
|
||||
the following command:
|
||||
|
||||
# %%PREFIX%%/bin/libpostal_data download all %%LIBPOSTAL_DATADIR%%
|
6
misc/libpostal/pkg-descr
Normal file
6
misc/libpostal/pkg-descr
Normal file
|
@ -0,0 +1,6 @@
|
|||
C library for parsing/normalizing street addresses around the world, powered
|
||||
by statistical NLP and open geo data. This library helps convert the
|
||||
free-form addresses that humans use into clean normalized forms suitable for
|
||||
machine comparison and full-text indexing.
|
||||
|
||||
WWW: https://github.com/openvenues/libpostal
|
9
misc/libpostal/pkg-plist
Normal file
9
misc/libpostal/pkg-plist
Normal file
|
@ -0,0 +1,9 @@
|
|||
bin/address_parser
|
||||
bin/libpostal
|
||||
bin/libpostal_data
|
||||
include/libpostal/libpostal.h
|
||||
%%STATIC%%lib/libpostal.a
|
||||
lib/libpostal.so
|
||||
lib/libpostal.so.1
|
||||
lib/libpostal.so.1.0.0
|
||||
libdata/pkgconfig/libpostal.pc
|
Loading…
Add table
Reference in a new issue