Maven dependencies:
<dependency><groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>4.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>4.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>4.0.0</version>
</dependency>
Create IndexWriterFactory:
public enum IndexWriterFactory {WRITER;
private static IndexWriter indexWriter = null;
public IndexWriter getIndexWriter() throws IOException {
ConfigUtil systemConfig = ConfigUtil.getInstance();
String directoryConfig = systemConfig.getValue("index.directory");
FSDirectory directory = null;
boolean isLocked = new File(directoryConfig, IndexWriter.WRITE_LOCK_NAME).exists();
if (isLocked) {
return null;
}
File indexDirectory = new File(directoryConfig);
StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
IndexWriterConfig indexConfig = new IndexWriterConfig(Version.LUCENE_40, analyzer);
try {
NativeFSLockFactory lock = new NativeFSLockFactory();
directory = NIOFSDirectory.open(indexDirectory, lock);
indexWriter = new IndexWriter(directory, indexConfig);
} catch (IOException e) {
e.printStackTrace();
throw (e);
}
return indexWriter;
}
public void deleteAll() throws IOException {
indexWriter.deleteAll();
indexWriter.commit();
}
public void close() throws IOException {
indexWriter.close();
}
}
Create IndexReaderFactory:
public enum IndexReaderFactory {READER;
private static IndexReader indexReader = null;
public static IndexReader getIndexReader() throws IOException {
ConfigUtil config = ConfigUtil.getInstance();
String directoryConfig = config.getValue("index.directory");
try {
File indexDirectory = new File(directoryConfig);
FSDirectory directory = FSDirectory.open(indexDirectory);
indexReader = DirectoryReader.open(directory);
} catch (IOException e) {
e.printStackTrace();
throw (e);
}
return indexReader;
}
}
Create your index implementation:
public enum IndexServiceImpl {INDEXER;
public Set<YourModel> indexItemQueue = null;
private static Logger LOGGER = LoggerFactory.getLogger(IndexAssets.class);
{
indexItemQueue = new LinkedHashSet<YourModel>();
}
public void addIndexItem(YourModel indexItem) {
indexItemQueue.add(indexItem);
}
public void addIndexItem(List<YourModel> indexItemList) {
indexItemQueue.addAll(indexItemList);
}
public synchronized String reIndex() throws Exception {
LOGGER.debug("BEGIN: index " + indexItemQueue.size());
IndexWriter indexWriter = IndexWriterFactory.WRITER.getIndexWriter();
String status = null;
int count = 0;
if (indexWriter != null) {
YourModel entry = null;
Iterator<YourModel> iterator = indexItemQueue.iterator();
try {
while (iterator.hasNext()) {
entry = iterator.next();
iterator.remove();
Document document = new Document();
Term term = null;
//add index fields to document
document.add(new StringField("field_name", entry.getValue(), Field.Store.YES));
//create term to query to check if this index is existed or not
term = new Term("field_name", entry.getValue());
//
// Query for term above
//
if(term != null){
indexWriter.updateDocument(term, document);
} else {
indexWriter.addDocument(document);
}
}
} finally {
indexWriter.close();
}
LOGGER.debug("END: index " + count + " records");
status = "INDEX_FINISHED";
return status;
} else {
status = "INDEX_UNFINISHED";
return status;
}
}
public synchronized void reIndexAll() throws Exception {
//delete all index data first
IndexAssets.INDEXER.deleteAll();
//checkout if deleting process done, every 5s
try {
IndexReader reader = IndexAssets.getIndexReader();
int numOfDoc = reader.numDocs();
while (0 != numOfDoc) {
Thread.sleep(5000);
numOfDoc = reader.numDocs();
}
} catch (InterruptedException e) {
LOGGER.error("There is an exeception when deleting all old index items. Cause:" + e.getMessage());
} catch (Exception e) {
LOGGER.error("There is an exeception when deleting all old index items. Cause:" + e.getMessage());
}
List<YourModel> list = new ArrayList<YourModel>();
list.addAll(/*your data here*/);
LOGGER.info("INDEXING: " + list.size() + " records");
IndexAssets.INDEXER.addIndexItem(list);
IndexAssets.INDEXER.index();
}
public String deleteAll() throws Exception {
String status = null;
indexItemQueue.clear();
IndexWriter indexWriter = IndexWriterFactory.WRITER.getIndexWriter();
if (indexWriter != null) {
IndexWriterFactory.WRITER.deleteAll();
indexWriter.close();
status = "INDEX_FINISHED";
return status;
} else {
status = "INDEX_UNFINISHED";
return status;
}
}
}
Search in index:
public List<String[]> search(String keyword, int pageNum, boolean isSearchExactKeyword) {ConfigUtil config = ConfigUtil.getInstance();
String numberOfProgramPerPageStr = config.getValue("search.numberOfProgramPerPage");
int numberOfProgramPerPage = Integer.parseInt(numberOfProgramPerPageStr);
String directoryStr = config.getValue("index.directory");
// get from config
int from = (pageNum - 1) * numberOfProgramPerPage;
List<String[]> results = new ArrayList<String[]>();
//escape special characters
keyword = keyword.replaceAll("/^\\s+|\\s$/g", " ").toLowerCase();
String[] keywords_Array = keyword.split(" ");
Set<String> stopwordsSet = new HashSet<String>();
//remove out the stopword from the keyword
Iterator iter = StandardAnalyzer.STOP_WORDS_SET.iterator();
while (iter.hasNext()) {
char[] stopWord = (char[]) iter.next();
stopwordsSet.add(new String(stopWord));
}
String[] stopwordsArray = new String[stopwordsSet.size()];
stopwordsSet.toArray(stopwordsArray);
BooleanQuery bq = new BooleanQuery();
boolean isStopword = false;
for (String key : keywords_Array) {
isStopword = false;
for (String stopword : stopwordsArray) {
if (key.toLowerCase().equals(stopword)) {
isStopword = true;
break;
}
}
if (!isStopword) {
Query query = new TermQuery(new Term("field_name", key));
if (isSearchExactKeyword == true) {
bq.add(query, BooleanClause.Occur.MUST);
} else {// for program search
bq.add(query, BooleanClause.Occur.SHOULD);
}
}
}
try {
IndexReader reader = IndexReaderFactory.INDEXREADER.getIndexReader();
IndexSearcher searcher = new IndexSearcher(reader);
TotalHitCountCollector collectorCount = new TotalHitCountCollector();
searcher.search(bq, collectorCount);
int count = collectorCount.getTotalHits();
if (count <= 0) {
LOGGER.debug("No Result");
return results;
}
TopFieldCollector collector = TopFieldCollector.create(Sort.RELEVANCE, count, true, false, false, true);
searcher.search(bq, collector);
ScoreDoc[] hits = new ScoreDoc[0];
if (pageNum == -1) {
hits = collector.topDocs().scoreDocs;
} else {
hits = collector.topDocs(from, numberOfProgramPerPage).scoreDocs;
// If found nothing, return search from page 0;
if (hits.length <= 0) {
from = 0;
hits = collector.topDocs(from, numberOfProgramPerPage).scoreDocs;
}
}
int sequenceNum= 0;
String sequenceNumString = "";
for (int i = 0; i < hits.length; i++) {
int docId = hits[i].doc;
Document doc = null;
try {
doc = searcher.doc(docId);
} catch (IOException e) {
e.printStackTrace();
}
//get out the data from document
String fieldData_1 = doc.get("field_name1");
String fieldData_2 = doc.get("field_name2");
sequenceNum++;
sequenceNumString = String.valueOf(sequenceNum);
results.add(new String[] { fieldData_1, fieldData_2, sequenceNumString });
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
return results;
}
No comments:
Post a Comment