vectorstring,allocator::iterator iter_end = words-end();

while ( iter != iter_end )

{

string::size_type pos = 0;

while ((pos = (*iter).find_first_of(filt_elems, pos))

!= string::npos )

(*iter).erase(pos,l);

++iter;

}

}

void

TextQuery::

suffix_text()

{

vectorstring,allocator *words = text_locations-first;

vectorstring,allocator::iterator iter = words-begin();

vectorstring,allocator::iterator iter_end = words-end() ;

while ( iter != iter_end ) {

if ( (*iter).size() = 3 )

{ iter++; continue; }

if ( (*iter)[ (*iter).size()-l ] == 's' )

suffix_s( *iter );

// дополнительная обработка суффиксов...

iter++;

}

}

void

TextQuery::

suffix_s( string word )

{

string::size_type spos = 0;

string::size_type pos3 = word.size()-3;

// "ous", "ss", "is", "ius"

string suffixes( "oussisius" );

if ( ! word.compare( pos3, 3, suffixes, spos, 3 ) ||

! word.compare( pos3, 3, suffixes, spos+6, 3) ||

! word.compare( pos3+l, 2, suffixes, spos+2, 2 ) ||

! word.compare( pos3+l, 2, suffixes, spos+4, 2 ))

return;

string ies( "ies" );

if ( ! word.compare( pos3, 3, ies ))

{

word.replace( pos3, 3, 1, 'у' );

return;

}

string ses( "ses" );

if ( ! word.compare( pos3, 3, ses ))

{

word.erase( pos3+l, 2 );

return;

}

// удалим 's' в конце

word.erase( pos3+2 );

// удалим "'s"

if ( word[ pos3+l ] == '\'' )

word.erase( pos3+l );

}

void

TextQuery::

strip_caps()

{

vectorstring,allocator *words = text_locations-first;

vectorstring,allocator::iterator iter = words-begin();

vectorstring,allocator::iterator iter_end = words-end();

string caps( "ABCDEFGHI3KLMNOPQRSTUVWXYZ" );

while ( iter != iter_end ) {

string::size_type pos = 0;

while (( pos = (*iter).find_first_of( caps, pos ))

!= string::npos )

(*iter)[ pos ] = to1ower( (*iter)[pos] );

++iter;

}

}

void

TextQuery::

build_word_map()

{

word_map = new mapstring,loc*,lessstring,allocator;

typedef mapstring,loc*,lessstring,allocator::value_type

value_type;

typedef setstring,lessstring,allocator::difference_type

diff_type;

setstring,lessstring,allocator exclusion_set;

ifstream infile( "exclusion_set" );

if ( !infile )

{

static string default_excluded_words[25] = {

"the","and","but","that","then","are","been",

"can","can't","cannot","could","did","for",

"had","have","him","his","her","its"."into",

"were","which","when","with","would"

};

cerr "warning! unable to open word exclusion file! -- "

"using default set\n";

copy( default_excluded_words,

default_excluded_words+25,

inserter(exclusion_set, exclusion_set.begin()));

}

else {

istream_iterator string, diff_type

input_set( infile ), eos;

copy( input_set, eos,

inserter( exclusion_set, exclusion_set.begin() ));

}

Перейти на страницу:

Похожие книги