textproc/amberfish: update to 1.7.1, take maintainership

- chase to new upstream - always install man pages as per policy - license changed to MIT - submitter becomes maintainer - turn static REINPLACE_CMD use into patches Changelog: https://gitlab.com/amberfish/amberfish/-/releases PR: 282880
2025-05-24 23:16:33 -04:00 · 2024-11-25 10:22:37 -05:00 · 2024-11-25 10:22:37 -05:00 · 13e3d18eb6
commit 13e3d18eb6
parent 63202843ff
6 changed files with 72 additions and 494 deletions
--- a/textproc/amberfish/Makefile
+++ b/textproc/amberfish/Makefile
@ -1,54 +1,46 @@
 PORTNAME=	amberfish
-PORTVERSION=	1.6.4
-PORTREVISION=	3
+DISTVERSIONPREFIX=	v
+DISTVERSION=	1.7.1
 CATEGORIES=	textproc databases
-MASTER_SITES=	SF/${PORTNAME}/Amberfish%20source%20-%20stable/${PORTVERSION}	\
-		http://etymon.com/software/amberfish/stable/

-MAINTAINER=	ports@FreeBSD.org
-COMMENT=	General purpose text retrieval Software
-WWW=		https://web.archive.org/web/20100419215307/http://www.etymon.com/tr.html
+MAINTAINER=	nrn@etymon.com
+COMMENT=	Full-text search engine with command-line interface
+WWW=		https://gitlab.com/amberfish/amberfish

-LICENSE=	GPLv2
-LICENSE_FILE=	${WRKSRC}/COPYING
+LICENSE=	MIT
+LICENSE_FILE=	${WRKSRC}/LICENSE

 LIB_DEPENDS=	libxerces-c.so:textproc/xerces-c3

 USES=		gmake
+USE_GITLAB=	yes

 GNU_CONFIGURE=	yes
-GNU_CONFIGURE_MANPREFIX=${PREFIX}/share
+ALL_TARGET=	all

-ALL_TARGET=	all html
-
-PLIST_FILES=	bin/af
-PORTDOCS=	*
+PLIST_FILES=	bin/af \
+		share/man/man1/af.1.gz \
+		share/man/man3/afclose.3.gz \
+		share/man/man3/afgetresultmd.3.gz \
+		share/man/man3/afopen.3.gz \
+		share/man/man3/afsearch.3.gz \
+		share/man/man3/afsortdocid.3.gz \
+		share/man/man3/afsortscore.3.gz
+PORTDOCS=	amberfish.html

 OPTIONS_DEFINE=	DOCS
+DOCS_BUILD_DEPENDS=	asciidoctor:textproc/rubygem-asciidoctor
+DOCS_ALL_TARGET=	html

-DOCS_USES=	makeinfo
-DOCS_PLIST_FILES=	share/man/man1/af.1.gz
+post-configure:
+	${ECHO_CMD} "#define AF_VERSION \"v${DISTVERSION}\"" > ${WRKSRC}/src/backend/version.h
+	${ECHO_CMD} v${DISTVERSION} > ${WRKSRC}/doc/version.adoc

-post-extract:
-	${CP} ${FILESDIR}/porter.cc ${WRKSRC}/src
-
-post-patch:
-	@${REINPLACE_CMD} -e \
-		's|$${MAKEFLAGS} ||' ${WRKSRC}/Makefile
-	@${REINPLACE_CMD} -e \
-		's|cp |$${BSD_INSTALL_MAN} |' ${WRKSRC}/doc/Makefile.in
-	@${REINPLACE_CMD} -e \
-		's|-O3 |@CFLAGS@ | ; \
-		s|make strip|| ; \
-		s|cp |$${BSD_INSTALL_PROGRAM} |' ${WRKSRC}/src/Makefile.in
-
-post-patch-DOCS-off:
-	@${REINPLACE_CMD} -e \
-		'/cd doc/d' ${WRKSRC}/Makefile
+post-install:
+	${STRIP_CMD} ${STAGEDIR}${PREFIX}/bin/af

 post-install-DOCS-on:
-	@${MKDIR} ${STAGEDIR}${DOCSDIR}
-	${INSTALL_DATA} ${WRKSRC}/amberfish.png ${STAGEDIR}${DOCSDIR}
-	${INSTALL_DATA} ${WRKSRC}/doc/html/*.html ${STAGEDIR}${DOCSDIR}
+	${MKDIR} ${STAGEDIR}${DOCSDIR}
+	${INSTALL_DATA} ${WRKSRC}/doc/amberfish.html ${STAGEDIR}${DOCSDIR}

 .include <bsd.port.mk>
--- a/textproc/amberfish/distinfo
+++ b/textproc/amberfish/distinfo
@ -1,2 +1,3 @@
-SHA256 (amberfish-1.6.4.tar.gz) = 155ac6e6b9b76fb7cbd94952548f718ab6add72c3b4fd2482d89abb39d96ce76
-SIZE (amberfish-1.6.4.tar.gz) = 127198
+TIMESTAMP = 1732616395
+SHA256 (amberfish-v1.7.1.tar.bz2) = 67c8b007be4652ceaafe0d93c9ac6ef40541e6163f820f8271d02704817af9a0
+SIZE (amberfish-v1.7.1.tar.bz2) = 117241
--- a/textproc/amberfish/files/patch-Makefile
+++ b/textproc/amberfish/files/patch-Makefile
@ -0,0 +1,26 @@
+--- Makefile.orig	2024-11-23 13:45:47 UTC
+++ Makefile
+@@ -16,18 +16,18 @@ strip:
+ 	cd doc ; ${MAKE} html
+ 
+ strip:
+-	cd src/backend ; ${MAKE} ${MAKEFLAGS} strip
+	cd src/backend ; ${MAKE} strip
+ #	cd src/interface ; ${MAKE} ${MAKEFLAGS} strip
+ 
+ install:
+-	cd src/backend ; ${MAKE} ${MAKEFLAGS} install
+	cd src/backend ; ${MAKE} install
+ #	cd src/interface ; ${MAKE} ${MAKEFLAGS} install
+-	cd doc ; ${MAKE} ${MAKEFLAGS} install
+	cd doc ; ${MAKE} install
+ 
+ uninstall:
+-	cd src/backend ; ${MAKE} ${MAKEFLAGS} uninstall
+	cd src/backend ; ${MAKE} uninstall
+ #	cd src/interface ; ${MAKE} ${MAKEFLAGS} uninstall
+-	cd doc ; ${MAKE} ${MAKEFLAGS} uninstall
+	cd doc ; ${MAKE} uninstall
+ 
+ clean:
+ 	rm -fr autom4te.cache
--- a/textproc/amberfish/files/patch-src_backend_Makefile.in
+++ b/textproc/amberfish/files/patch-src_backend_Makefile.in
@ -0,0 +1,10 @@
+--- src/backend/Makefile.in.orig	2024-11-23 13:47:04 UTC
+++ src/backend/Makefile.in
+@@ -62,7 +62,6 @@ install: all
+ 	strip ${AF}
+ 
+ install: all
+-	make strip
+ 	mkdir -p ${PREFIXBIN}
+ 	cp ${BIN} ${PREFIXBIN}/.
+ 
--- a/textproc/amberfish/files/porter.cc
+++ b/textproc/amberfish/files/porter.cc
@ -1,438 +0,0 @@
-
-/* This is the Porter stemming algorithm, coded up in ANSI C by the
-   author. It may be be regarded as cononical, in that it follows the
-   algorithm presented in
-
-   Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
-   no. 3, pp 130-137,
-
-   only differing from it at the points maked --DEPARTURE-- below.
-
-   See also http://www.tartarus.org/~martin/PorterStemmer
-
-The algorithm as described in the paper could be exactly replicated
-by adjusting the points of DEPARTURE, but this is barely necessary,
-because (a) the points of DEPARTURE are definitely improvements, and
-(b) no encoding of the Porter stemmer I have seen is anything like
-as exact as this version, even with the points of DEPARTURE!
-
-You can compile it on Unix with 'gcc -O3 -o stem stem.c' after which
-'stem' takes a list of inputs and sends the stemmed equivalent to
-stdout.
-
-The algorithm as encoded here is particularly fast.
-
-Release 1
-*/
-
-#include <string.h>                               /* for memmove */
-
-#define TRUE 1
-#define FALSE 0
-
-/* The main part of the stemming algorithm starts here. b is a buffer
-   holding a word to be stemmed. The letters are in b[k0], b[k0+1] ...
-   ending at b[k]. In fact k0 = 0 in this demo program. k is readjusted
-   downwards as the stemming progresses. Zero termination is not in fact
-   used in the algorithm.
-
-   Note that only lower case sequences are stemmed. Forcing to lower case
-   should be done before stem(...) is called.
-*/
-
-static char * b;                                  /* buffer for word to be stemmed */
-static int k,k0,j;                                /* j is a general offset into the string */
-
-/* cons(i) is TRUE <=> b[i] is a consonant. */
-
-static int cons(int i)
-{
-    switch (b[i])
-    {
-        case 'a': case 'e': case 'i': case 'o': case 'u': return FALSE;
-        case 'y': return (i==k0) ? TRUE : !cons(i-1);
-        default: return TRUE;
-    }
-}
-
-
-/* m() measures the number of consonant sequences between k0 and j. if c is
-   a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
-   presence,
-
-      <c><v>       gives 0
-      <c>vc<v>     gives 1
-      <c>vcvc<v>   gives 2
-      <c>vcvcvc<v> gives 3
-      ....
-*/
-
-static int m()
-{
-    int n = 0;
-    int i = k0;
-    while(TRUE)
-    {
-        if (i > j) return n;
-        if (! cons(i)) break; i++;
-    }
-    i++;
-    while(TRUE)
-    {
-        while(TRUE)
-        {
-            if (i > j) return n;
-            if (cons(i)) break;
-            i++;
-        }
-        i++;
-        n++;
-        while(TRUE)
-        {
-            if (i > j) return n;
-            if (! cons(i)) break;
-            i++;
-        }
-        i++;
-    }
-}
-
-
-/* vowelinstem() is TRUE <=> k0,...j contains a vowel */
-
-static int vowelinstem()
-{
-    int i; for (i = k0; i <= j; i++) if (! cons(i)) return TRUE;
-    return FALSE;
-}
-
-
-/* doublec(j) is TRUE <=> j,(j-1) contain a double consonant. */
-
-static int doublec(int j)
-{
-    if (j < k0+1) return FALSE;
-    if (b[j] != b[j-1]) return FALSE;
-    return cons(j);
-}
-
-
-/* cvc(i) is TRUE <=> i-2,i-1,i has the form consonant - vowel - consonant
-   and also if the second c is not w,x or y. this is used when trying to
-   restore an e at the end of a short word. e.g.
-
-      cav(e), lov(e), hop(e), crim(e), but
-      snow, box, tray.
-
-*/
-
-static int cvc(int i)
-{
-    if (i < k0+2 || !cons(i) || cons(i-1) || !cons(i-2)) return FALSE;
-    {
-        int ch = b[i];
-        if (ch == 'w' || ch == 'x' || ch == 'y') return FALSE;
-    }
-    return TRUE;
-}
-
-
-/* ends(s) is TRUE <=> k0,...k ends with the string s. */
-
-static int ends(char * s)
-{
-    int length = s[0];
-    if (s[length] != b[k]) return FALSE;          /* tiny speed-up */
-    if (length > k-k0+1) return FALSE;
-    if (memcmp(b+k-length+1,s+1,length) != 0) return FALSE;
-    j = k-length;
-    return TRUE;
-}
-
-
-/* setto(s) sets (j+1),...k to the characters in the string s, readjusting
-   k. */
-
-static void setto(char * s)
-{
-    int length = s[0];
-    memmove(b+j+1,s+1,length);
-    k = j+length;
-}
-
-
-/* r(s) is used further down. */
-
-static void r(char * s) { if (m() > 0) setto(s); }
-
-/* step1ab() gets rid of plurals and -ed or -ing. e.g.
-
-	caresses  ->  caress
-	ponies    ->  poni
-	ties      ->  ti
-	caress    ->  caress
-	cats      ->  cat
-
-	feed      ->  feed
-	agreed    ->  agree
-	disabled  ->  disable
-
-	matting   ->  mat
-	mating    ->  mate
-	meeting   ->  meet
-	milling   ->  mill
-	messing   ->  mess
-
-	meetings  ->  meet
-
-*/
-
-static void step1ab()
-{
-    if (b[k] == 's')
-    {
-        if (ends("\04" "sses")) k -= 2; else
-            if (ends("\03" "ies")) setto("\01" "i"); else
-                if (b[k-1] != 's') k--;
-    }
-    if (ends("\03" "eed")) { if (m() > 0) k--; }
-    else
-    if ((ends("\02" "ed") || ends("\03" "ing")) && vowelinstem())
-    {
-        k = j;
-        if (ends("\02" "at")) setto("\03" "ate"); else
-            if (ends("\02" "bl")) setto("\03" "ble"); else
-                if (ends("\02" "iz")) setto("\03" "ize"); else
-                    if (doublec(k))
-                    {
-                        k--;
-                        {
-                            int ch = b[k];
-                            if (ch == 'l' || ch == 's' || ch == 'z') k++;
-                        }
-                    }
-        else if (m() == 1 && cvc(k)) setto("\01" "e");
-    }
-}
-
-
-/* step1c() turns terminal y to i when there is another vowel in the stem. */
-
-static void step1c() { if (ends("\01" "y") && vowelinstem()) b[k] = 'i'; }
-
-/* step2() maps double suffices to single ones. so -ization ( = -ize plus
-   -ation) maps to -ize etc. note that the string before the suffix must give
-   m() > 0. */
-
-static void step2()
-{
-    switch (b[k-1])
-    {
-        case 'a': if (ends("\07" "ational")) { r("\03" "ate"); break; }
-        if (ends("\06" "tional")) { r("\04" "tion"); break; }
-        break;
-        case 'c': if (ends("\04" "enci")) { r("\04" "ence"); break; }
-        if (ends("\04" "anci")) { r("\04" "ance"); break; }
-        break;
-        case 'e': if (ends("\04" "izer")) { r("\03" "ize"); break; }
-        break;
-        case 'l': if (ends("\03" "bli"))          /*-DEPARTURE-*/
-        {
-            r("\03" "ble"); break;
-        }
-
-/* To match the published algorithm, replace this line with
-   case 'l': if (ends("\04" "abli")) { r("\04" "able"); break; } */
-
-        if (ends("\04" "alli")) { r("\02" "al"); break; }
-        if (ends("\05" "entli")) { r("\03" "ent"); break; }
-        if (ends("\03" "eli")) { r("\01" "e"); break; }
-        if (ends("\05" "ousli")) { r("\03" "ous"); break; }
-        break;
-        case 'o': if (ends("\07" "ization")) { r("\03" "ize"); break; }
-        if (ends("\05" "ation")) { r("\03" "ate"); break; }
-        if (ends("\04" "ator")) { r("\03" "ate"); break; }
-        break;
-        case 's': if (ends("\05" "alism")) { r("\02" "al"); break; }
-        if (ends("\07" "iveness")) { r("\03" "ive"); break; }
-        if (ends("\07" "fulness")) { r("\03" "ful"); break; }
-        if (ends("\07" "ousness")) { r("\03" "ous"); break; }
-        break;
-        case 't': if (ends("\05" "aliti")) { r("\02" "al"); break; }
-        if (ends("\05" "iviti")) { r("\03" "ive"); break; }
-        if (ends("\06" "biliti")) { r("\03" "ble"); break; }
-        break;
-        case 'g': if (ends("\04" "logi"))         /*-DEPARTURE-*/
-        {
-            r("\03" "log"); break;
-        }
-
-/* To match the published algorithm, delete this line */
-
-    }
-}
-
-
-/* step3() deals with -ic-, -full, -ness etc. similar strategy to step2. */
-
-static void step3()
-{
-    switch (b[k])
-    {
-        case 'e': if (ends("\05" "icate")) { r("\02" "ic"); break; }
-        if (ends("\05" "ative")) { r("\00" ""); break; }
-        if (ends("\05" "alize")) { r("\02" "al"); break; }
-        break;
-        case 'i': if (ends("\05" "iciti")) { r("\02" "ic"); break; }
-        break;
-        case 'l': if (ends("\04" "ical")) { r("\02" "ic"); break; }
-        if (ends("\03" "ful")) { r("\00" ""); break; }
-        break;
-        case 's': if (ends("\04" "ness")) { r("\00" ""); break; }
-        break;
-    }
-}
-
-
-/* step4() takes off -ant, -ence etc., in context <c>vcvc<v>. */
-
-static void step4()
-{
-    switch (b[k-1])
-    {
-        case 'a': if (ends("\02" "al")) break; return;
-        case 'c': if (ends("\04" "ance")) break;
-        if (ends("\04" "ence")) break; return;
-        case 'e': if (ends("\02" "er")) break; return;
-        case 'i': if (ends("\02" "ic")) break; return;
-        case 'l': if (ends("\04" "able")) break;
-        if (ends("\04" "ible")) break; return;
-        case 'n': if (ends("\03" "ant")) break;
-        if (ends("\05" "ement")) break;
-        if (ends("\04" "ment")) break;
-        if (ends("\03" "ent")) break; return;
-        case 'o': if (ends("\03" "ion") && (b[j] == 's' || b[j] == 't')) break;
-        if (ends("\02" "ou")) break; return;
-/* takes care of -ous */
-        case 's': if (ends("\03" "ism")) break; return;
-        case 't': if (ends("\03" "ate")) break;
-        if (ends("\03" "iti")) break; return;
-        case 'u': if (ends("\03" "ous")) break; return;
-        case 'v': if (ends("\03" "ive")) break; return;
-        case 'z': if (ends("\03" "ize")) break; return;
-        default: return;
-    }
-    if (m() > 1) k = j;
-}
-
-
-/* step5() removes a final -e if m() > 1, and changes -ll to -l if
-   m() > 1. */
-
-static void step5()
-{
-    j = k;
-    if (b[k] == 'e')
-    {
-        int a = m();
-        if (a > 1 || a == 1 && !cvc(k-1)) k--;
-    }
-    if (b[k] == 'l' && doublec(k) && m() > 1) k--;
-}
-
-
-/* In stem(p,i,j), p is a char pointer, and the string to be stemmed is from
-   p[i] to p[j] inclusive. Typically i is zero and j is the offset to the last
-   character of a string, (p[j+1] == '\0'). The stemmer adjusts the
-   characters p[i] ... p[j] and returns the new end-point of the string, k.
-   Stemming never increases word length, so i <= k <= j. To turn the stemmer
-   into a module, declare 'stem' as extern, and delete the remainder of this
-   file.
-*/
-
-int stem(char * p, int i, int j)
-{                                                 /* copy the parameters into statics */
-    b = p; k = j; k0 = i;
-    if (k <= k0+1) return k;                      /*-DEPARTURE-*/
-
-/* With this line, strings of length 1 or 2 don't go through the
-   stemming process, although no mention is made of this in the
-   published algorithm. Remove the line to match the published
-   algorithm. */
-
-    step1ab(); step1c(); step2(); step3(); step4(); step5();
-    return k;
-}
-
-
-/*--------------------stemmer definition ends here------------------------*/
-
-#include <stdio.h>
-#include <stdlib.h>                               /* for malloc, free */
-#include <ctype.h>                                /* for isupper, islower, tolower */
-
-static char * s;                                  /* a char * (=string) pointer; passed into b above */
-
-#define INC 50                                    /* size units in which s is increased */
-static int i_max = INC;                           /* maximum offset in s */
-
-void increase_s()
-{
-    i_max += INC;
-    {
-        char * new_s = (char *) malloc(i_max+1);
-        {                                         /* copy across */
-            int i; for (i = 0; i < i_max; i++) new_s[i] = s[i];
-        }
-        free(s); s = new_s;
-    }
-}
-
-
-#define LETTER(ch) (isupper(ch) || islower(ch))
-
-static void stemfile(FILE * f)
-{
-    while(TRUE)
-    {
-        int ch = getc(f);
-        if (ch == EOF) return;
-        if (LETTER(ch))
-        {
-            int i = 0;
-            while(TRUE)
-            {
-                if (i == i_max) increase_s();
-
-                ch = tolower(ch);                 /* forces lower case */
-
-                s[i] = ch; i++;
-                ch = getc(f);
-                if (!LETTER(ch)) { ungetc(ch,f); break; }
-            }
-            s[stem(s,0,i-1)+1] = 0;
-/* the previous line calls the stemmer and uses its result to
-   zero-terminate the string in s */
-            printf("%s",s);
-        }
-        else putchar(ch);
-    }
-}
-
-/*
- * Commented out as required by amberfish's INSTALL file
- *
-	int main(int argc, char * argv[])
-	{
-	    int i;
-	    s = (char *) malloc(i_max+1);
-	    for (i = 1; i < argc; i++)
-	    {
-	        FILE * f = fopen(argv[i],"r");
-	        if (f == 0) { fprintf(stderr,"File %s not found\n",argv[i]); exit(1); }
-	        stemfile(f);
-	    }
-	    free(s);
-	    return 0;
-	}
-*/
--- a/textproc/amberfish/pkg-descr
+++ b/textproc/amberfish/pkg-descr
@ -1,19 +1,6 @@
-Amberfish is general purpose text retrieval software, developed at Etymon
-by Nassib Nassar and distributed as open source software under the terms
-of version 2 of the GNU General Public License (GPL). Its distinguishing
-features are indexing/search of semi-structured text (i.e. both free tex
-and multiply nested fields), built-in support for XML documents using the
-Xerces library, structured queries allowing generalized field/tag paths,
-hierarchical result sets (XML only), automatic searching across multiple
-databases (allowing modular indexing), TREC format results, efficient
-indexing, and relatively low memory requirements during indexing (and the
-ability to index documents larger than available memory). Z39.50 support
-is available. Other features include Boolean queries, right truncation,
-phrase searching, relevance ranking, support for multiple documents per
-file, incremental indexing, and easy integration with other UNIX tools,
-The architecture is also designed to permit proximity queries; however,
-they are not fully implemented at present.
-
-This port also includes the Porter stemming algorithm for suffix
-stripping, available at:
-     http://www.tartarus.org/~martin/PorterStemmer
+Amberfish is a full-text search engine with a command-line interface.
+Its features include free-text and Boolean queries, relevance-ranked
+results, wildcard search, phrase search, field search and structured
+field path queries for XML, multiple documents per file and nested
+documents, searching across multiple indexes, incremental update of
+indexes, and low memory requirements for building indexes.