mirror of
https://git.freebsd.org/ports.git
synced 2025-04-28 01:26:39 -04:00
textproc/amberfish: update to 1.7.1, take maintainership
- chase to new upstream - always install man pages as per policy - license changed to MIT - submitter becomes maintainer - turn static REINPLACE_CMD use into patches Changelog: https://gitlab.com/amberfish/amberfish/-/releases PR: 282880
This commit is contained in:
parent
63202843ff
commit
13e3d18eb6
6 changed files with 72 additions and 494 deletions
|
@ -1,54 +1,46 @@
|
|||
PORTNAME= amberfish
|
||||
PORTVERSION= 1.6.4
|
||||
PORTREVISION= 3
|
||||
DISTVERSIONPREFIX= v
|
||||
DISTVERSION= 1.7.1
|
||||
CATEGORIES= textproc databases
|
||||
MASTER_SITES= SF/${PORTNAME}/Amberfish%20source%20-%20stable/${PORTVERSION} \
|
||||
http://etymon.com/software/amberfish/stable/
|
||||
|
||||
MAINTAINER= ports@FreeBSD.org
|
||||
COMMENT= General purpose text retrieval Software
|
||||
WWW= https://web.archive.org/web/20100419215307/http://www.etymon.com/tr.html
|
||||
MAINTAINER= nrn@etymon.com
|
||||
COMMENT= Full-text search engine with command-line interface
|
||||
WWW= https://gitlab.com/amberfish/amberfish
|
||||
|
||||
LICENSE= GPLv2
|
||||
LICENSE_FILE= ${WRKSRC}/COPYING
|
||||
LICENSE= MIT
|
||||
LICENSE_FILE= ${WRKSRC}/LICENSE
|
||||
|
||||
LIB_DEPENDS= libxerces-c.so:textproc/xerces-c3
|
||||
|
||||
USES= gmake
|
||||
USE_GITLAB= yes
|
||||
|
||||
GNU_CONFIGURE= yes
|
||||
GNU_CONFIGURE_MANPREFIX=${PREFIX}/share
|
||||
ALL_TARGET= all
|
||||
|
||||
ALL_TARGET= all html
|
||||
|
||||
PLIST_FILES= bin/af
|
||||
PORTDOCS= *
|
||||
PLIST_FILES= bin/af \
|
||||
share/man/man1/af.1.gz \
|
||||
share/man/man3/afclose.3.gz \
|
||||
share/man/man3/afgetresultmd.3.gz \
|
||||
share/man/man3/afopen.3.gz \
|
||||
share/man/man3/afsearch.3.gz \
|
||||
share/man/man3/afsortdocid.3.gz \
|
||||
share/man/man3/afsortscore.3.gz
|
||||
PORTDOCS= amberfish.html
|
||||
|
||||
OPTIONS_DEFINE= DOCS
|
||||
DOCS_BUILD_DEPENDS= asciidoctor:textproc/rubygem-asciidoctor
|
||||
DOCS_ALL_TARGET= html
|
||||
|
||||
DOCS_USES= makeinfo
|
||||
DOCS_PLIST_FILES= share/man/man1/af.1.gz
|
||||
post-configure:
|
||||
${ECHO_CMD} "#define AF_VERSION \"v${DISTVERSION}\"" > ${WRKSRC}/src/backend/version.h
|
||||
${ECHO_CMD} v${DISTVERSION} > ${WRKSRC}/doc/version.adoc
|
||||
|
||||
post-extract:
|
||||
${CP} ${FILESDIR}/porter.cc ${WRKSRC}/src
|
||||
|
||||
post-patch:
|
||||
@${REINPLACE_CMD} -e \
|
||||
's|$${MAKEFLAGS} ||' ${WRKSRC}/Makefile
|
||||
@${REINPLACE_CMD} -e \
|
||||
's|cp |$${BSD_INSTALL_MAN} |' ${WRKSRC}/doc/Makefile.in
|
||||
@${REINPLACE_CMD} -e \
|
||||
's|-O3 |@CFLAGS@ | ; \
|
||||
s|make strip|| ; \
|
||||
s|cp |$${BSD_INSTALL_PROGRAM} |' ${WRKSRC}/src/Makefile.in
|
||||
|
||||
post-patch-DOCS-off:
|
||||
@${REINPLACE_CMD} -e \
|
||||
'/cd doc/d' ${WRKSRC}/Makefile
|
||||
post-install:
|
||||
${STRIP_CMD} ${STAGEDIR}${PREFIX}/bin/af
|
||||
|
||||
post-install-DOCS-on:
|
||||
@${MKDIR} ${STAGEDIR}${DOCSDIR}
|
||||
${INSTALL_DATA} ${WRKSRC}/amberfish.png ${STAGEDIR}${DOCSDIR}
|
||||
${INSTALL_DATA} ${WRKSRC}/doc/html/*.html ${STAGEDIR}${DOCSDIR}
|
||||
${MKDIR} ${STAGEDIR}${DOCSDIR}
|
||||
${INSTALL_DATA} ${WRKSRC}/doc/amberfish.html ${STAGEDIR}${DOCSDIR}
|
||||
|
||||
.include <bsd.port.mk>
|
||||
|
|
|
@ -1,2 +1,3 @@
|
|||
SHA256 (amberfish-1.6.4.tar.gz) = 155ac6e6b9b76fb7cbd94952548f718ab6add72c3b4fd2482d89abb39d96ce76
|
||||
SIZE (amberfish-1.6.4.tar.gz) = 127198
|
||||
TIMESTAMP = 1732616395
|
||||
SHA256 (amberfish-v1.7.1.tar.bz2) = 67c8b007be4652ceaafe0d93c9ac6ef40541e6163f820f8271d02704817af9a0
|
||||
SIZE (amberfish-v1.7.1.tar.bz2) = 117241
|
||||
|
|
26
textproc/amberfish/files/patch-Makefile
Normal file
26
textproc/amberfish/files/patch-Makefile
Normal file
|
@ -0,0 +1,26 @@
|
|||
--- Makefile.orig 2024-11-23 13:45:47 UTC
|
||||
+++ Makefile
|
||||
@@ -16,18 +16,18 @@ strip:
|
||||
cd doc ; ${MAKE} html
|
||||
|
||||
strip:
|
||||
- cd src/backend ; ${MAKE} ${MAKEFLAGS} strip
|
||||
+ cd src/backend ; ${MAKE} strip
|
||||
# cd src/interface ; ${MAKE} ${MAKEFLAGS} strip
|
||||
|
||||
install:
|
||||
- cd src/backend ; ${MAKE} ${MAKEFLAGS} install
|
||||
+ cd src/backend ; ${MAKE} install
|
||||
# cd src/interface ; ${MAKE} ${MAKEFLAGS} install
|
||||
- cd doc ; ${MAKE} ${MAKEFLAGS} install
|
||||
+ cd doc ; ${MAKE} install
|
||||
|
||||
uninstall:
|
||||
- cd src/backend ; ${MAKE} ${MAKEFLAGS} uninstall
|
||||
+ cd src/backend ; ${MAKE} uninstall
|
||||
# cd src/interface ; ${MAKE} ${MAKEFLAGS} uninstall
|
||||
- cd doc ; ${MAKE} ${MAKEFLAGS} uninstall
|
||||
+ cd doc ; ${MAKE} uninstall
|
||||
|
||||
clean:
|
||||
rm -fr autom4te.cache
|
10
textproc/amberfish/files/patch-src_backend_Makefile.in
Normal file
10
textproc/amberfish/files/patch-src_backend_Makefile.in
Normal file
|
@ -0,0 +1,10 @@
|
|||
--- src/backend/Makefile.in.orig 2024-11-23 13:47:04 UTC
|
||||
+++ src/backend/Makefile.in
|
||||
@@ -62,7 +62,6 @@ install: all
|
||||
strip ${AF}
|
||||
|
||||
install: all
|
||||
- make strip
|
||||
mkdir -p ${PREFIXBIN}
|
||||
cp ${BIN} ${PREFIXBIN}/.
|
||||
|
|
@ -1,438 +0,0 @@
|
|||
|
||||
/* This is the Porter stemming algorithm, coded up in ANSI C by the
|
||||
author. It may be be regarded as cononical, in that it follows the
|
||||
algorithm presented in
|
||||
|
||||
Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
|
||||
no. 3, pp 130-137,
|
||||
|
||||
only differing from it at the points maked --DEPARTURE-- below.
|
||||
|
||||
See also http://www.tartarus.org/~martin/PorterStemmer
|
||||
|
||||
The algorithm as described in the paper could be exactly replicated
|
||||
by adjusting the points of DEPARTURE, but this is barely necessary,
|
||||
because (a) the points of DEPARTURE are definitely improvements, and
|
||||
(b) no encoding of the Porter stemmer I have seen is anything like
|
||||
as exact as this version, even with the points of DEPARTURE!
|
||||
|
||||
You can compile it on Unix with 'gcc -O3 -o stem stem.c' after which
|
||||
'stem' takes a list of inputs and sends the stemmed equivalent to
|
||||
stdout.
|
||||
|
||||
The algorithm as encoded here is particularly fast.
|
||||
|
||||
Release 1
|
||||
*/
|
||||
|
||||
#include <string.h> /* for memmove */
|
||||
|
||||
#define TRUE 1
|
||||
#define FALSE 0
|
||||
|
||||
/* The main part of the stemming algorithm starts here. b is a buffer
|
||||
holding a word to be stemmed. The letters are in b[k0], b[k0+1] ...
|
||||
ending at b[k]. In fact k0 = 0 in this demo program. k is readjusted
|
||||
downwards as the stemming progresses. Zero termination is not in fact
|
||||
used in the algorithm.
|
||||
|
||||
Note that only lower case sequences are stemmed. Forcing to lower case
|
||||
should be done before stem(...) is called.
|
||||
*/
|
||||
|
||||
static char * b; /* buffer for word to be stemmed */
|
||||
static int k,k0,j; /* j is a general offset into the string */
|
||||
|
||||
/* cons(i) is TRUE <=> b[i] is a consonant. */
|
||||
|
||||
static int cons(int i)
|
||||
{
|
||||
switch (b[i])
|
||||
{
|
||||
case 'a': case 'e': case 'i': case 'o': case 'u': return FALSE;
|
||||
case 'y': return (i==k0) ? TRUE : !cons(i-1);
|
||||
default: return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* m() measures the number of consonant sequences between k0 and j. if c is
|
||||
a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
|
||||
presence,
|
||||
|
||||
<c><v> gives 0
|
||||
<c>vc<v> gives 1
|
||||
<c>vcvc<v> gives 2
|
||||
<c>vcvcvc<v> gives 3
|
||||
....
|
||||
*/
|
||||
|
||||
static int m()
|
||||
{
|
||||
int n = 0;
|
||||
int i = k0;
|
||||
while(TRUE)
|
||||
{
|
||||
if (i > j) return n;
|
||||
if (! cons(i)) break; i++;
|
||||
}
|
||||
i++;
|
||||
while(TRUE)
|
||||
{
|
||||
while(TRUE)
|
||||
{
|
||||
if (i > j) return n;
|
||||
if (cons(i)) break;
|
||||
i++;
|
||||
}
|
||||
i++;
|
||||
n++;
|
||||
while(TRUE)
|
||||
{
|
||||
if (i > j) return n;
|
||||
if (! cons(i)) break;
|
||||
i++;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* vowelinstem() is TRUE <=> k0,...j contains a vowel */
|
||||
|
||||
static int vowelinstem()
|
||||
{
|
||||
int i; for (i = k0; i <= j; i++) if (! cons(i)) return TRUE;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
/* doublec(j) is TRUE <=> j,(j-1) contain a double consonant. */
|
||||
|
||||
static int doublec(int j)
|
||||
{
|
||||
if (j < k0+1) return FALSE;
|
||||
if (b[j] != b[j-1]) return FALSE;
|
||||
return cons(j);
|
||||
}
|
||||
|
||||
|
||||
/* cvc(i) is TRUE <=> i-2,i-1,i has the form consonant - vowel - consonant
|
||||
and also if the second c is not w,x or y. this is used when trying to
|
||||
restore an e at the end of a short word. e.g.
|
||||
|
||||
cav(e), lov(e), hop(e), crim(e), but
|
||||
snow, box, tray.
|
||||
|
||||
*/
|
||||
|
||||
static int cvc(int i)
|
||||
{
|
||||
if (i < k0+2 || !cons(i) || cons(i-1) || !cons(i-2)) return FALSE;
|
||||
{
|
||||
int ch = b[i];
|
||||
if (ch == 'w' || ch == 'x' || ch == 'y') return FALSE;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
/* ends(s) is TRUE <=> k0,...k ends with the string s. */
|
||||
|
||||
static int ends(char * s)
|
||||
{
|
||||
int length = s[0];
|
||||
if (s[length] != b[k]) return FALSE; /* tiny speed-up */
|
||||
if (length > k-k0+1) return FALSE;
|
||||
if (memcmp(b+k-length+1,s+1,length) != 0) return FALSE;
|
||||
j = k-length;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
/* setto(s) sets (j+1),...k to the characters in the string s, readjusting
|
||||
k. */
|
||||
|
||||
static void setto(char * s)
|
||||
{
|
||||
int length = s[0];
|
||||
memmove(b+j+1,s+1,length);
|
||||
k = j+length;
|
||||
}
|
||||
|
||||
|
||||
/* r(s) is used further down. */
|
||||
|
||||
static void r(char * s) { if (m() > 0) setto(s); }
|
||||
|
||||
/* step1ab() gets rid of plurals and -ed or -ing. e.g.
|
||||
|
||||
caresses -> caress
|
||||
ponies -> poni
|
||||
ties -> ti
|
||||
caress -> caress
|
||||
cats -> cat
|
||||
|
||||
feed -> feed
|
||||
agreed -> agree
|
||||
disabled -> disable
|
||||
|
||||
matting -> mat
|
||||
mating -> mate
|
||||
meeting -> meet
|
||||
milling -> mill
|
||||
messing -> mess
|
||||
|
||||
meetings -> meet
|
||||
|
||||
*/
|
||||
|
||||
static void step1ab()
|
||||
{
|
||||
if (b[k] == 's')
|
||||
{
|
||||
if (ends("\04" "sses")) k -= 2; else
|
||||
if (ends("\03" "ies")) setto("\01" "i"); else
|
||||
if (b[k-1] != 's') k--;
|
||||
}
|
||||
if (ends("\03" "eed")) { if (m() > 0) k--; }
|
||||
else
|
||||
if ((ends("\02" "ed") || ends("\03" "ing")) && vowelinstem())
|
||||
{
|
||||
k = j;
|
||||
if (ends("\02" "at")) setto("\03" "ate"); else
|
||||
if (ends("\02" "bl")) setto("\03" "ble"); else
|
||||
if (ends("\02" "iz")) setto("\03" "ize"); else
|
||||
if (doublec(k))
|
||||
{
|
||||
k--;
|
||||
{
|
||||
int ch = b[k];
|
||||
if (ch == 'l' || ch == 's' || ch == 'z') k++;
|
||||
}
|
||||
}
|
||||
else if (m() == 1 && cvc(k)) setto("\01" "e");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* step1c() turns terminal y to i when there is another vowel in the stem. */
|
||||
|
||||
static void step1c() { if (ends("\01" "y") && vowelinstem()) b[k] = 'i'; }
|
||||
|
||||
/* step2() maps double suffices to single ones. so -ization ( = -ize plus
|
||||
-ation) maps to -ize etc. note that the string before the suffix must give
|
||||
m() > 0. */
|
||||
|
||||
static void step2()
|
||||
{
|
||||
switch (b[k-1])
|
||||
{
|
||||
case 'a': if (ends("\07" "ational")) { r("\03" "ate"); break; }
|
||||
if (ends("\06" "tional")) { r("\04" "tion"); break; }
|
||||
break;
|
||||
case 'c': if (ends("\04" "enci")) { r("\04" "ence"); break; }
|
||||
if (ends("\04" "anci")) { r("\04" "ance"); break; }
|
||||
break;
|
||||
case 'e': if (ends("\04" "izer")) { r("\03" "ize"); break; }
|
||||
break;
|
||||
case 'l': if (ends("\03" "bli")) /*-DEPARTURE-*/
|
||||
{
|
||||
r("\03" "ble"); break;
|
||||
}
|
||||
|
||||
/* To match the published algorithm, replace this line with
|
||||
case 'l': if (ends("\04" "abli")) { r("\04" "able"); break; } */
|
||||
|
||||
if (ends("\04" "alli")) { r("\02" "al"); break; }
|
||||
if (ends("\05" "entli")) { r("\03" "ent"); break; }
|
||||
if (ends("\03" "eli")) { r("\01" "e"); break; }
|
||||
if (ends("\05" "ousli")) { r("\03" "ous"); break; }
|
||||
break;
|
||||
case 'o': if (ends("\07" "ization")) { r("\03" "ize"); break; }
|
||||
if (ends("\05" "ation")) { r("\03" "ate"); break; }
|
||||
if (ends("\04" "ator")) { r("\03" "ate"); break; }
|
||||
break;
|
||||
case 's': if (ends("\05" "alism")) { r("\02" "al"); break; }
|
||||
if (ends("\07" "iveness")) { r("\03" "ive"); break; }
|
||||
if (ends("\07" "fulness")) { r("\03" "ful"); break; }
|
||||
if (ends("\07" "ousness")) { r("\03" "ous"); break; }
|
||||
break;
|
||||
case 't': if (ends("\05" "aliti")) { r("\02" "al"); break; }
|
||||
if (ends("\05" "iviti")) { r("\03" "ive"); break; }
|
||||
if (ends("\06" "biliti")) { r("\03" "ble"); break; }
|
||||
break;
|
||||
case 'g': if (ends("\04" "logi")) /*-DEPARTURE-*/
|
||||
{
|
||||
r("\03" "log"); break;
|
||||
}
|
||||
|
||||
/* To match the published algorithm, delete this line */
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* step3() deals with -ic-, -full, -ness etc. similar strategy to step2. */
|
||||
|
||||
static void step3()
|
||||
{
|
||||
switch (b[k])
|
||||
{
|
||||
case 'e': if (ends("\05" "icate")) { r("\02" "ic"); break; }
|
||||
if (ends("\05" "ative")) { r("\00" ""); break; }
|
||||
if (ends("\05" "alize")) { r("\02" "al"); break; }
|
||||
break;
|
||||
case 'i': if (ends("\05" "iciti")) { r("\02" "ic"); break; }
|
||||
break;
|
||||
case 'l': if (ends("\04" "ical")) { r("\02" "ic"); break; }
|
||||
if (ends("\03" "ful")) { r("\00" ""); break; }
|
||||
break;
|
||||
case 's': if (ends("\04" "ness")) { r("\00" ""); break; }
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* step4() takes off -ant, -ence etc., in context <c>vcvc<v>. */
|
||||
|
||||
static void step4()
|
||||
{
|
||||
switch (b[k-1])
|
||||
{
|
||||
case 'a': if (ends("\02" "al")) break; return;
|
||||
case 'c': if (ends("\04" "ance")) break;
|
||||
if (ends("\04" "ence")) break; return;
|
||||
case 'e': if (ends("\02" "er")) break; return;
|
||||
case 'i': if (ends("\02" "ic")) break; return;
|
||||
case 'l': if (ends("\04" "able")) break;
|
||||
if (ends("\04" "ible")) break; return;
|
||||
case 'n': if (ends("\03" "ant")) break;
|
||||
if (ends("\05" "ement")) break;
|
||||
if (ends("\04" "ment")) break;
|
||||
if (ends("\03" "ent")) break; return;
|
||||
case 'o': if (ends("\03" "ion") && (b[j] == 's' || b[j] == 't')) break;
|
||||
if (ends("\02" "ou")) break; return;
|
||||
/* takes care of -ous */
|
||||
case 's': if (ends("\03" "ism")) break; return;
|
||||
case 't': if (ends("\03" "ate")) break;
|
||||
if (ends("\03" "iti")) break; return;
|
||||
case 'u': if (ends("\03" "ous")) break; return;
|
||||
case 'v': if (ends("\03" "ive")) break; return;
|
||||
case 'z': if (ends("\03" "ize")) break; return;
|
||||
default: return;
|
||||
}
|
||||
if (m() > 1) k = j;
|
||||
}
|
||||
|
||||
|
||||
/* step5() removes a final -e if m() > 1, and changes -ll to -l if
|
||||
m() > 1. */
|
||||
|
||||
static void step5()
|
||||
{
|
||||
j = k;
|
||||
if (b[k] == 'e')
|
||||
{
|
||||
int a = m();
|
||||
if (a > 1 || a == 1 && !cvc(k-1)) k--;
|
||||
}
|
||||
if (b[k] == 'l' && doublec(k) && m() > 1) k--;
|
||||
}
|
||||
|
||||
|
||||
/* In stem(p,i,j), p is a char pointer, and the string to be stemmed is from
|
||||
p[i] to p[j] inclusive. Typically i is zero and j is the offset to the last
|
||||
character of a string, (p[j+1] == '\0'). The stemmer adjusts the
|
||||
characters p[i] ... p[j] and returns the new end-point of the string, k.
|
||||
Stemming never increases word length, so i <= k <= j. To turn the stemmer
|
||||
into a module, declare 'stem' as extern, and delete the remainder of this
|
||||
file.
|
||||
*/
|
||||
|
||||
int stem(char * p, int i, int j)
|
||||
{ /* copy the parameters into statics */
|
||||
b = p; k = j; k0 = i;
|
||||
if (k <= k0+1) return k; /*-DEPARTURE-*/
|
||||
|
||||
/* With this line, strings of length 1 or 2 don't go through the
|
||||
stemming process, although no mention is made of this in the
|
||||
published algorithm. Remove the line to match the published
|
||||
algorithm. */
|
||||
|
||||
step1ab(); step1c(); step2(); step3(); step4(); step5();
|
||||
return k;
|
||||
}
|
||||
|
||||
|
||||
/*--------------------stemmer definition ends here------------------------*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h> /* for malloc, free */
|
||||
#include <ctype.h> /* for isupper, islower, tolower */
|
||||
|
||||
static char * s; /* a char * (=string) pointer; passed into b above */
|
||||
|
||||
#define INC 50 /* size units in which s is increased */
|
||||
static int i_max = INC; /* maximum offset in s */
|
||||
|
||||
void increase_s()
|
||||
{
|
||||
i_max += INC;
|
||||
{
|
||||
char * new_s = (char *) malloc(i_max+1);
|
||||
{ /* copy across */
|
||||
int i; for (i = 0; i < i_max; i++) new_s[i] = s[i];
|
||||
}
|
||||
free(s); s = new_s;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define LETTER(ch) (isupper(ch) || islower(ch))
|
||||
|
||||
static void stemfile(FILE * f)
|
||||
{
|
||||
while(TRUE)
|
||||
{
|
||||
int ch = getc(f);
|
||||
if (ch == EOF) return;
|
||||
if (LETTER(ch))
|
||||
{
|
||||
int i = 0;
|
||||
while(TRUE)
|
||||
{
|
||||
if (i == i_max) increase_s();
|
||||
|
||||
ch = tolower(ch); /* forces lower case */
|
||||
|
||||
s[i] = ch; i++;
|
||||
ch = getc(f);
|
||||
if (!LETTER(ch)) { ungetc(ch,f); break; }
|
||||
}
|
||||
s[stem(s,0,i-1)+1] = 0;
|
||||
/* the previous line calls the stemmer and uses its result to
|
||||
zero-terminate the string in s */
|
||||
printf("%s",s);
|
||||
}
|
||||
else putchar(ch);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Commented out as required by amberfish's INSTALL file
|
||||
*
|
||||
int main(int argc, char * argv[])
|
||||
{
|
||||
int i;
|
||||
s = (char *) malloc(i_max+1);
|
||||
for (i = 1; i < argc; i++)
|
||||
{
|
||||
FILE * f = fopen(argv[i],"r");
|
||||
if (f == 0) { fprintf(stderr,"File %s not found\n",argv[i]); exit(1); }
|
||||
stemfile(f);
|
||||
}
|
||||
free(s);
|
||||
return 0;
|
||||
}
|
||||
*/
|
|
@ -1,19 +1,6 @@
|
|||
Amberfish is general purpose text retrieval software, developed at Etymon
|
||||
by Nassib Nassar and distributed as open source software under the terms
|
||||
of version 2 of the GNU General Public License (GPL). Its distinguishing
|
||||
features are indexing/search of semi-structured text (i.e. both free tex
|
||||
and multiply nested fields), built-in support for XML documents using the
|
||||
Xerces library, structured queries allowing generalized field/tag paths,
|
||||
hierarchical result sets (XML only), automatic searching across multiple
|
||||
databases (allowing modular indexing), TREC format results, efficient
|
||||
indexing, and relatively low memory requirements during indexing (and the
|
||||
ability to index documents larger than available memory). Z39.50 support
|
||||
is available. Other features include Boolean queries, right truncation,
|
||||
phrase searching, relevance ranking, support for multiple documents per
|
||||
file, incremental indexing, and easy integration with other UNIX tools,
|
||||
The architecture is also designed to permit proximity queries; however,
|
||||
they are not fully implemented at present.
|
||||
|
||||
This port also includes the Porter stemming algorithm for suffix
|
||||
stripping, available at:
|
||||
http://www.tartarus.org/~martin/PorterStemmer
|
||||
Amberfish is a full-text search engine with a command-line interface.
|
||||
Its features include free-text and Boolean queries, relevance-ranked
|
||||
results, wildcard search, phrase search, field search and structured
|
||||
field path queries for XML, multiple documents per file and nested
|
||||
documents, searching across multiple indexes, incremental update of
|
||||
indexes, and low memory requirements for building indexes.
|
||||
|
|
Loading…
Add table
Reference in a new issue