• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

C++ lem::UCString类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了C++中lem::UCString的典型用法代码示例。如果您正苦于以下问题:C++ UCString类的具体用法?C++ UCString怎么用?C++ UCString使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了UCString类的16个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: SelectIntPair

std::pair<int, int> NGramsDBMS::FindRawNGramsID(
    const lem::FString &suffix,
    const lem::FString &sgm,
    const lem::UCString &w1,
    const lem::UCString &w2,
    const lem::UCString &w3,
    const lem::UCString &w4,
    const lem::UCString &w5
)
{
    FString sql = lem::format_str("SELECT w, N.id"
        " FROM NGRAM_WORDS%s%s wrd1, NGRAM_WORDS%s%s wrd2, NGRAM_WORDS%s%s wrd3, NGRAM_WORDS%s%s wrd4, NGRAM_WORDS%s%s wrd5, NGRAM5%s%s N"
        " WHERE wrd1.word='%s' AND wrd2.word='%s' AND wrd3.word='%s' AND wrd4.word='%s' AND wrd5.word='%s' AND iword1=wrd1.id AND iword2=wrd2.id AND iword3=wrd3.id AND iword4=wrd4.id AND iword5=wrd5.id"
        , suffix.c_str(), sgm.c_str()
        , suffix.c_str(), sgm.c_str()
        , suffix.c_str(), sgm.c_str()
        , suffix.c_str(), sgm.c_str()
        , suffix.c_str(), sgm.c_str()
        , suffix.c_str(), sgm.c_str()
        , to_utf8(w1.c_str()).c_str(), to_utf8(w2.c_str()).c_str()
        , to_utf8(w3.c_str()).c_str()
        , to_utf8(w4.c_str()).c_str(), to_utf8(w5.c_str()).c_str());

    std::pair<int, int> r = SelectIntPair(sql);
    return r;
}
开发者ID:Koziev,项目名称:GrammarEngine,代码行数:26,代码来源:NGramsDBMS.cpp


示例2: BuildKey

SG_EntryGroup::KEY SG_EntryGroup::BuildKey( const lem::UCString &str )
{
 switch( str.length() )
 {
  case 1: return KEY( str.front(), 0, 0 );
  case 2: return KEY( str.front(), str[1], 0 );
  default: return KEY( str.front(), str[1], str[2] );
 }
}
开发者ID:mcdir,项目名称:GrammarEngine,代码行数:9,代码来源:sg_entry_group.cpp


示例3: Crop

bool LA_PreprocessorRules::Crop(
    const lem::UCString &word,
    lem::MCollect<lem::UCString> &results,
    lem::MCollect<lem::Real1> &rels,
    LA_RecognitionTrace *trace
) const
{
    bool applied = false;

    if (!crop_rules.empty())
    {
        // сначала применяем префиксные правила
        typedef CROP_RULES::const_iterator IT;

        LA_CropRule::HashType prefix_hash = LA_CropRule::CalcHash(word.c_str(), true, false);
        std::pair<IT, IT> pp = prefix_crop_rules.equal_range(prefix_hash);

        lem::UCString result;

        for (auto it = pp.first; it != pp.second; ++it)
        {
            const LA_CropRule *r = it->second;
            if (r->Apply(word, result))
            {
                applied = true;
                results.push_back(result);
                rels.push_back(r->GetRel());
                if (trace != nullptr)
                {
                    trace->CropRuleApplied(word, result, r);
                }
            }
        }

        // теперь отсекаем аффикс

        LA_CropRule::HashType affix_hash = LA_CropRule::CalcHash(word.c_str(), false, true);
        pp = affix_crop_rules.equal_range(affix_hash);

        for (auto it = pp.first; it != pp.second; ++it)
        {
            const LA_CropRule *r = it->second;
            if (r->Apply(word, result))
            {
                applied = true;
                results.push_back(result);
                rels.push_back(r->GetRel());
                if (trace != nullptr)
                {
                    trace->CropRuleApplied(word, result, r);
                }
            }
        }
    }

    return applied;
}
开发者ID:Koziev,项目名称:GrammarEngine,代码行数:57,代码来源:LA_PreprocessorRules.cpp


示例4: unsigned

void Lemmatizator::DecodeWord1( const lem::CString & a, lem::UCString & u ) const
{
 for( int i=0; i<a.length(); ++i )
  u.ptr()[i] = i2wchar[ unsigned(a[i]) ];

 u.ptr()[ a.length() ] = 0;
 u.calc_hash();   
 
 return; 
}
开发者ID:mcdir,项目名称:GrammarEngine,代码行数:10,代码来源:lemmatizator.cpp


示例5: CString

const lem::CString lem::to_ascii( const lem::UCString &str, const CodeConverter *cp )
{
 if( str.empty() )
  return CString();

 lem::CString ascii;
 lem_unicode_to_ascii( ascii.ptr(), str.c_str(), cp ? cp : &lem::UI::get_UI().GetSessionCp() );
 ascii.ptr()[str.length()] = 0;
 ascii.calc_hash();
 return ascii;
}
开发者ID:mcdir,项目名称:GrammarEngine,代码行数:11,代码来源:cp_recodings.cpp


示例6:

void Lemmatizator::EncodeWord1( const lem::UCString & u, lem::CString & a ) const
{
 for( int i=0; i<u.length(); ++i )
  {
   std::map<wchar_t,int>::const_iterator it = wchar2i.find(u[i]);
   if( it==wchar2i.end() )
    a.ptr()[i] = (char)0xff;
   else
    a.ptr()[i] = it->second;
  }

 a.ptr()[ u.length() ] = 0;
 a.calc_hash();   
 
 return; 
}
开发者ID:mcdir,项目名称:GrammarEngine,代码行数:16,代码来源:lemmatizator.cpp


示例7: LEM_CHECKIT_Z

lem::UCString Lemmatizator::GetSuffix( const lem::UCString & word ) const
{
 LEM_CHECKIT_Z( !word.empty() );

 lem::UCString uword(word);
 uword.to_lower();

 if( word.length()==7 && uword==L"~~end~~" )
  return word;
 else if( word.length()==9 && uword==L"~~begin~~" )
  return word;
 else if( word.length()<=model_suffix_len )
  return uword;
 else
  return lem::UCString(L'~')+lem::right( uword, model_suffix_len );
}
开发者ID:mcdir,项目名称:GrammarEngine,代码行数:16,代码来源:lemmatizator.cpp


示例8: AddVariable

void TreeScorerBoundVariables::AddVariable( const lem::UCString & upper_name, const Solarix::Word_Form * wordform )
{
 LEM_CHECKIT_Z( !upper_name.empty() );
 LEM_CHECKIT_Z( wordform!=NULL );

 bound_variables[ upper_name ] = wordform;
 return;
}
开发者ID:mcdir,项目名称:GrammarEngine,代码行数:8,代码来源:TreeScorerBoundVariables.cpp


示例9: InsertWord

void NGramsDBMS::InsertWord(
    const lem::FString &suffix,
    int id,
    const lem::UCString &word
)
{
    FString an(to_utf8(word.c_str()));
    FString sql = lem::format_str("INSERT INTO NGRAM_WORDS%s( id, word ) VALUES ( %d, '%s' )", suffix.c_str(), id, an.c_str());
    ExecuteSql(sql);
    return;
}
开发者ID:Koziev,项目名称:GrammarEngine,代码行数:11,代码来源:NGramsDBMS.cpp


示例10: FindWord

int NGramsDBMS::FindWord(
    const lem::FString &suffix,
    const lem::FString &sgm,
    const lem::UCString &word
)
{
    FString sql = lem::format_str("SELECT id FROM NGRAM_WORDS%s%s WHERE word='%s'"
        , suffix.c_str(), sgm.c_str(), to_utf8(word.c_str()).c_str()
    );
    const int id = SelectInt(sql, -1);
    return id;
}
开发者ID:Koziev,项目名称:GrammarEngine,代码行数:12,代码来源:NGramsDBMS.cpp


示例11: SyllabContextLeftBoundary

SyllabContext::SyllabContext(GraphGram &alphabet, const lem::UCString &word, int id_language)
{
    LEM_CHECKIT_Z(id_language != UNKNOWN);

    const SG_Language &lang = alphabet.GetDict().GetSynGram().languages()[id_language];
    const lem::MCollect<int> & id_alphabets = lang.GetAlphabets();

    points.push_back(new SyllabContextLeftBoundary());

    // Разбираем исходное слово по символам, каждый символ ищем в алфавите и создаем по результатам одну точку контекста 
    lem::WideStringUcs4 ucs4(word.c_str());
    lem::uint32_t c;
    while ((c = ucs4.Fetch()) != 0)
    {
        const Word_Coord wc = alphabet.entries().FindSymbol(c, id_alphabets);
        if (wc.IsUnknown())
        {
            SyllabContextUnknownSymbol *point = new SyllabContextUnknownSymbol(c);
            points.push_back(point);
        }
        else
        {
            const GG_Entry &e = alphabet.entries()[wc.GetEntry()];
            const int id_class = e.GetClass();

            const GG_EntryForm & f = e.forms()[wc.GetForm()];
            lem::MCollect<GramCoordPair> coords = f.dims();
            for (lem::Container::size_type k = 0; k < e.attrs().size(); ++k)
                coords.push_back(e.attrs()[k]);

            SyllabContextSymbol *point = new SyllabContextSymbol(c, e.GetName(), wc.GetEntry(), id_class, coords);
            points.push_back(point);
        }
    }

    points.push_back(new SyllabContextRightBoundary());

    return;
}
开发者ID:Koziev,项目名称:GrammarEngine,代码行数:39,代码来源:SyllabContext.cpp


示例12: rs

void NGramsDBMS::FindRaw3GramsWithCenter(
    const lem::FString &suffix,
    const lem::FString &sgm,
    float max_w3,
    const lem::UCString &center,
    int min_freq,
    std::map< std::pair<lem::UCString, lem::UCString>, float > & reslist
)
{
    FString sql = lem::format_str(
        "SELECT wrd1.word, wrd3.word, w"
        " FROM NGRAM3%s%s, NGRAM_WORDS%s%s wrd1, NGRAM_WORDS%s%s wrd2, NGRAM_WORDS%s%s wrd3"
        " WHERE wrd2.word='%s' AND iword2=wrd2.id AND wrd1.id=iword1 AND wrd3.id=iword3"
        , suffix.c_str(), sgm.c_str()
        , suffix.c_str(), sgm.c_str()
        , suffix.c_str(), sgm.c_str()
        , suffix.c_str(), sgm.c_str()
        , to_utf8(center.c_str()).c_str());

    std::unique_ptr<LS_ResultSet> rs(Select(sql));
    while (rs->Fetch())
    {
        const int f = rs->GetInt(2);
        if (f >= min_freq)
        {
            UCString cw1(rs->GetUCString(0));
            UCString cw3(rs->GetUCString(1));
            const float ff = float(f) / max_w3;

            auto it = reslist.find(std::make_pair(cw1, cw3));
            if (it == reslist.end())
                reslist.insert(std::make_pair(std::make_pair(cw1, cw3), ff));
            else
                it->second += ff;
        }
    }

    return;
}
开发者ID:Koziev,项目名称:GrammarEngine,代码行数:39,代码来源:NGramsDBMS.cpp


示例13: cs

void NGramsDBMS::Find2GramsWithRight(
    const lem::FString &suffix,
    const lem::FString &sgm,
    float max_w2,
    const lem::UCString &right,
    int min_freq,
    std::map< lem::UCString, float > & reslist
)
{
    FString sql = lem::format_str(
        "SELECT wrd2.word, w"
        " FROM NGRAM_WORDS%s%s wrd1, NGRAM2%s%s, NGRAM_WORDS%s%s wrd2"
        " WHERE wrd1.word='%s' AND iword2=wrd1.id AND wrd2.id=iword1"
        , suffix.c_str(), sgm.c_str()
        , suffix.c_str(), sgm.c_str()
        , suffix.c_str(), sgm.c_str()
        , to_utf8(right.c_str()).c_str());

    std::unique_ptr<LS_ResultSet> rs(Select(sql));
    while (rs->Fetch())
    {
        const int f = rs->GetInt(1);
        if (f >= min_freq)
        {
            UCString cs(rs->GetUCString(0));
            const float ff = float(f) / max_w2;

            auto it = reslist.find(cs);
            if (it == reslist.end())
                reslist.insert(std::make_pair(cs, ff));
            else
                it->second += ff;
        }
    }

    return;
}
开发者ID:Koziev,项目名称:GrammarEngine,代码行数:37,代码来源:NGramsDBMS.cpp


示例14: if

SG_calibrator::SG_calibrator(const lem::UCString & keyword, const SynGram &sg, const Sol_IO &io, Macro_Parser &txtfile)
{
    if (keyword.eqi(L"wordentry_freq"))
        freq_type = WordEntryFreq;
    else if (keyword.eqi(L"wordform_score"))
        freq_type = WordFormScore;
    else if (keyword.eqi(L"wordforms_score"))
        freq_type = WordFormsScore;

    id_class = UNKNOWN;
    freq = 0;

    word = txtfile.read().string();
    word.strip(L'"');

    // если далее идет открывающая фигурная скобка, то значит конкретизируется словоформа (или несколько
    // словоформ).

    if (txtfile.probe(B_OFIGPAREN))
    {
        while (!txtfile.eof())
        {
            if (txtfile.pick().GetToken() == B_CFIGPAREN)
            {
                txtfile.read();
                break;
            }

            // для обычных: координата:состояние
            // для бистабильных: координата
            lem::Iridium::BethToken coord_name = txtfile.read();

            if (id_class == UNKNOWN)
            {
                const int id_class0 = sg.FindClass(coord_name);
                if (id_class0 != UNKNOWN)
                {
                    id_class = id_class0;
                    continue;
                }
            }

            bool AFFIRM = true;

            if (coord_name.GetToken() == B_NEGATIVE)
            {
                // Оператор отрицания перед определением координаты!
                AFFIRM = false;
                coord_name = txtfile.read();
            }

            const GramCoordAdr iglob_coord = sg.FindCoord(coord_name.string());

            if (!iglob_coord.IsDefined())
            {
                sg.GetIO().merr().printf("Unknown coordinate %us\n", coord_name.c_str());
                lem::Iridium::Print_Error(coord_name, txtfile);
                throw lem::E_BaseException();
            }

            if (sg.coords()[iglob_coord.GetIndex()].IsBistable())
            {
                // Имя состояния не может быть указано.
                coords.push_back(GramCoordPair(iglob_coord, AFFIRM));
            }
            else
            {
                // После двоеточия должно идти имя состояния для координаты.
                txtfile.read_it(B_COLON);

                // Имя состояния.
                BethToken state_name = txtfile.read();

                // Получим индекс состояния для определенной координаты.
                const int istate = sg.coords()[iglob_coord.GetIndex()]
                    .FindState(state_name.string());
                if (istate == UNKNOWN)
                {
                    // Нет такого состояния для этого измерения.
                    lem::Iridium::Print_Error(state_name, txtfile);
                    sg.GetIO().merr().printf(
                        "State [%vfE%us%vn] is not declared for coordinate [%vfE%us%vn]\n"
                        , state_name.c_str(), coord_name.c_str()
                    );
                    throw E_ParserError();
                }

                coords.push_back(GramCoordEx(iglob_coord, istate, AFFIRM));
            }
        }
    }

    txtfile.read_it(B_EQUAL);

    if (txtfile.probe(B_SUB))
        freq = -txtfile.read_int();
    else
        freq = txtfile.read_int();
    return;
}
开发者ID:Koziev,项目名称:GrammarEngine,代码行数:100,代码来源:sg_calibrator.cpp


示例15: rs_predicates

void LexicalAutomat::SaveRules_SQL( OFormatter &out, OFormatter &alters, const SQL_Production &sql_version )
{
 if( sql_version.norules )
  return;

 PM_Automat::SaveRules_SQL( "la", out, alters, sql_version );


 lem::Ptr<LS_ResultSet> rs_predicates( GetStorage().ListPredicateTemplate() );
 while( rs_predicates->Fetch() )
 {
  int id = rs_predicates->GetInt(0);
  lem::Ptr<PredicateTemplate> t( GetStorage().LoadPredicateTemplate(id) );

  lem::UFString src( sql_version.SqlStr(t->GetSrc()) );
  
  lem::UFString params;
  for( lem::Container::size_type i=0; i<t->GetParams().size(); ++i )
   if( i>0 )
    {
     params.Add_Dirty( L"," );
     params.Add_Dirty( t->GetParams()[i].c_str() );
    }
   else
    {
     params.Add_Dirty( t->GetParams()[i].c_str() );
    }

  out.printf( "INSERT INTO predicate_template( src, params ) VALUES ( '%us', '%us' );\n", src.c_str(), params.c_str() );
 }
 rs_predicates.Delete();
 out.eol();
 out.flush();
 

 lem::Ptr<LS_ResultSet> rs_assocs( GetStorage().ListAssociatedEntries() );
 while( rs_assocs->Fetch() )
 {
  const int id = rs_assocs->GetInt(0);
  lem::Ptr<WordAssociation> assoc( GetStorage().LoadAssocitation(id) );
  assoc->SaveSQL( out, sql_version );
 }
 rs_assocs.Delete();
 out.flush();

 out.printf( "\n\n" );

 lem::Ptr<LS_ResultSet> misspelling( GetStorage().ListMisspelling() );
 while( misspelling->Fetch() )
  {
   int id_language = misspelling->GetInt(1);
   UFString old_word = sql_version.SqlStr(misspelling->GetUFString(2));
   UFString new_word = sql_version.SqlStr(misspelling->GetUFString(3));

   old_word = sql_version.SqlStr(old_word);
   new_word = sql_version.SqlStr(new_word);   

   out.printf( "INSERT INTO misspelling( id_language, original_word, substitution ) VALUES ( %d, '%us', '%us' );\n",
    id_language, old_word.c_str(), new_word.c_str() );
  }
 misspelling.Delete();
 out.eol();


 lem::Ptr<WordEntrySetEnumerator> wordentry_sets( wordentry_set->ListWordEntrySets() );
 while( wordentry_sets->Fetch() )
  {
   const WordEntrySetItem &wes = wordentry_sets->GetItem();

   lem::UFString s;
   for( std::set<int>::const_iterator it=wes.ies.begin(); it!=wes.ies.end(); ++it )
    {
     if( !s.empty() )
      s.Add_Dirty(L' ');

     s.Add_Dirty( lem::to_ustr(*it).c_str() );
    } 

   out.printf( "INSERT INTO word_entry_set( id, name, ies ) VALUES ( %d, '%us', '%us' );\n",
                 wes.GetId(), lem::to_upper(wes.GetName()).c_str(), s.c_str() );
  }
 wordentry_sets.Delete();
 out.eol();


 lem::Ptr<WordSetEnumerator> word_sets( wordentry_set->ListWordSets() );
 while( word_sets->Fetch() )
  {
   const WordSetItem &wes = word_sets->GetItem();

   lem::UFString s;
   for( std::set<lem::UCString>::const_iterator it=wes.words.begin(); it!=wes.words.end(); ++it )
    {
     if( !s.empty() )
      s.Add_Dirty(LexiconStorage::WORD_SET_DELIMITER);

     s.Add_Dirty( sql_version.SqlStr(*it) );
    } 

   out.printf( "INSERT INTO word_set( id, name, words, case_sensitive ) VALUES ( %d, '%us', '%us', %d );\n",
//.........这里部分代码省略.........
开发者ID:mcdir,项目名称:GrammarEngine,代码行数:101,代码来源:la_sql_export.cpp


示例16: is_udelim

bool lem::is_udelim( const lem::UCString &s )
{ return s.size()==1 && lem::is_udelim(s.front()); }
开发者ID:mcdir,项目名称:GrammarEngine,代码行数:2,代码来源:lem_ucstring.cpp



注:本文中的lem::UCString类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
C++ iridium::Macro_Parser类代码示例发布时间:2022-05-31
下一篇:
C++ lem::Stream类代码示例发布时间:2022-05-31
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap