Lucene应用越来越多,在对中文对索引过程中,中文分词问题也就越来越重要。
在已有的分词模式中,目前比较常用的也是比较通用的有一元分词、二元分词和基于词库的分词三种。一元分词在Java版本上由yysun实现,并且已经收录到Apache。其实现方式比较简单,即将每一个汉字作为一个Token,例如:“这是中文字”,在经过一元分词模式分词后的结果为五个Token:这、是、中、文、字。而二元分词,则将两个相连的汉...
Java版N皇后算法
回溯法,代码如下:
/**
* author Akalius Kung 2008-2-8
**/
public class Queen {
private int[] grids; // location in each row, index is each row, array value is location of each queen
private int n;
private static int sum;
public Queen() {
init(8);
}
public Queen(int n) {
this.n = n;
grids=new int[n];
for(int i=0;i<n;i++){
grids[i]=0;
}
}
private void init(int n){
...
Java版堆排序
/**
* author Akalius Kung 2008-2-9
**/
public class HeapSort {
private int heapLen;
private int[] sort(int[] array){
heapLen=array.length;
buildHeap(array); // init the heap
for(int i=heapLen-1;i>0;i--){ // swap root and last node, up to down
swap(array,i,0);
heapLen--;
heapify(array,0); // reheapify the root node from 0 to n-1
}
return array;
}
private void buildHeap(...
Java版堆排序
/**
* author Akalius Kung 2008-2-9
**/
public class HeapSort {
private int heapLen;
private int[] sort(int[] array){
heapLen=array.length;
buildHeap(array); // init the heap
for(int i=heapLen-1;i>0;i--){ // swap root and last node, up to down
swap(array,i,0);
heapLen--;
heapify(array,0); // reheapify the root node from 0 to n-1
}
return array;
}
private void buildHeap(i...
Java版归并排序
/**
* author Akalius Kung 2008-2-5
**/
public class MergeSort {
public int[] sort(int[] data) {
int[] temp=new int[data.length];
mergeSort(data,temp,0,data.length-1);
return data;
}
private void mergeSort(int[] data,int[] temp,int l,int r){
int mid=(l+r)/2;
System.out.println(l+", "+mid+", "+r);
if(l==r) return ;
...
Java版插入排序
/**
* author Akalius Kung 2008-2-9
**/
public class InsertionSort {
private int[] sort(int[] array){
for(int j=1;j<array.length;j++){
int swapLoc = j; // init the location where to insert
for(int i=j-1;i>=0;i--){ // get the location where to insert
if(array[i]>array[j]){
swapLoc=i;
}
}
// backward the elems between swapLoc and j
int temp=array[j];
for(int k=j-1;k...