ports/textproc/clucene-contrib/files/patch-src_CLucene_analysis_cjk_CJKAnalyzer.h
Josh Paetzel be5629e921 CLucene is a C++ port of Lucene: the high-performance,
full-featured text search engine written in Java.
CLucene is faster than lucene as it is written in C++.

WWW:	http://sourceforge.net/projects/clucene/

PR:		ports/151951
Submitted by:	Tom Judge <tom at tomjudge.com>
2010-11-05 15:49:20 +00:00

45 lines
1.3 KiB
C

--- src/CLucene/analysis/cjk/CJKAnalyzer.h Fri Aug 1 17:55:43 2008
+++ src/CLucene/analysis/cjk/CJKAnalyzer.h.orig Tue May 27 17:41:14 2008
@@ -52,7 +52,7 @@
* character buffer, store the characters which are used to compose <br>
* the returned Token
*/
- TCHAR buffer[LUCENE_MAX_WORD_LEN];
+ TCHAR buffer[LUCENE_MAX_WORD_LEN+1];
/**
* I/O buffer, used to store the content of the input(one of the <br>
@@ -102,6 +102,33 @@
};
+/** Represents a CJK analyzer.
+ *
+ * Filters CJKTokenizer with StopFilter.
+ *
+ * @author Che, Dong
+ */
+
+class CJKAnalyzer : public CL_NS(analysis)::Analyzer
+{
+ private:
+ CL_NS(util)::CLSetList<const TCHAR*> stopTable;
+ public:
+ /** Builds an analyzer.*/
+ CJKAnalyzer();
+
+ /** Builds an analyzer with the given stop words. */
+ CJKAnalyzer(const TCHAR** stopWords);
+
+ ~CJKAnalyzer();
+
+ /** get token stream from input **/
+ CL_NS(analysis)::TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader);
+
+ /** An array containing some common English words that are not usually useful for
+ searching and some double-byte interpunctions. */
+ static const TCHAR* STOP_WORDS[];
+};
CL_NS_END2
#endif