Python Examples of jieba.cut_for_search - ProgramCreek.com
文章推薦指數: 80 %
def testCutForSearch(self): for content in test_contents: result = jieba.cut_for_search(content) assert isinstance(result, types. SearchbyModuleSearchbyWordProjectSearchTopPythonAPIsPopularProjectsJavaC++PythonScalaBlogreportthisadMorefromjieba.cut().lcut().setLogLevel().set_dictionary().analyse().pool().tokenize().dt().add_word().posseg().enable_parallel().load_userdict().del_word().cut_for_search().__version__()._get_abs_path().initialize().Tokenizer().suggest_freq()reportthisadRelatedMethodssys.exit()sys.argv()re.compile()sys.stdout()time.time()time.sleep()os.makedirs()logging.getLogger()os.getcwd()logging.DEBUGthreading.Thread()json.loads()json.dumps()hashlib.md5()codecs.open()collections.defaultdict()requests.get()redis.StrictRedis()jieba.cut()jieba.load_userdict()RelatedModulesossysretimeloggingdatetimerandommathjsonpicklenumpycollectionsargparserequeststensorflowPythonjieba.cut_for_search()ExamplesThefollowingare24 codeexamplesforshowinghowtousejieba.cut_for_search(). Theseexamplesareextractedfromopensourceprojects. Youcanvoteuptheonesyoulikeorvotedowntheonesyoudon'tlike, andgototheoriginalprojectorsourcefilebyfollowingthelinksaboveeachexample.YoumaycheckouttherelatedAPIusageonthesidebar.Youmayalsowanttocheckoutallavailablefunctions/classesofthemodule jieba ,ortrythesearchfunction .Example1Project: jieba_fast Author:deepcs233 File:jieba_test.py License:MITLicense5 votes deftestCutForSearch(self): forcontentintest_contents: result=jieba.cut_for_search(content) assertisinstance(result,types.GeneratorType),"TestCutForSearchGeneratorerror" result=list(result) assertisinstance(result,list),"TestCutForSearcherroroncontent:%s"%content print(",".join(result),file=sys.stderr) print("testCutForSearch",file=sys.stderr)Example2Project: jieba_fast Author:deepcs233 File:jieba_test.py License:MITLicense5 votes deftestCutForSearch_NOHMM(self): forcontentintest_contents: result=jieba.cut_for_search(content,HMM=False) assertisinstance(result,types.GeneratorType),"TestCutForSearchGeneratorerror" result=list(result) assertisinstance(result,list),"TestCutForSearcherroroncontent:%s"%content print(",".join(result),file=sys.stderr) print("testCutForSearch_NOHMM",file=sys.stderr)Example3Project: jieba_fast Author:deepcs233 File:test_cut_for_search.py License:MITLicense5 votes defcuttest(test_sent): result=jieba.cut_for_search(test_sent) forwordinresult: print(word,"/",end='') print("")Example4Project: jieba_fast Author:deepcs233 File:test_cut_for_search.py License:MITLicense5 votes defcuttest(test_sent): result=jieba.cut_for_search(test_sent) forwordinresult: print(word,"/",end='') print("")Example5Project: jieba_fast Author:deepcs233 File:test_multithread.py License:MITLicense5 votes defrun(self): seg_list=jieba.cut("我来到北京清华大学",cut_all=True) print("FullMode:"+"/".join(seg_list))#全模式 seg_list=jieba.cut("我来到北京清华大学",cut_all=False) print("DefaultMode:"+"/".join(seg_list))#默认模式 seg_list=jieba.cut("他来到了网易杭研大厦") print(",".join(seg_list)) seg_list=jieba.cut_for_search("小明硕士毕业于中国科学院计算所,后在日本京都大学深造")#搜索引擎模式 print(",".join(seg_list))Example6Project: public-opinion-analysis Author:galaxyyao File:article.py License:MITLicense5 votes defcache_raw_seg(self): config=yaml.safe_load(open("./application.yml")) r=redis.StrictRedis(host=config['redis']['host'],port=config['redis']['port'],db=config['redis']['db']) foriinrange(0,len(self.sentences)): raw_word_seg_list=jieba.cut_for_search(self.sentences[i].raw_sentence) sentence_seg_id='article:'+self.article_id+':raw_seg:'+str(i) forraw_word_seginraw_word_seg_list: r.sadd(sentence_seg_id,raw_word_seg)Example7Project: Customer-Chatbot Author:WenRichard File:jiebaSegment.py License:MITLicense5 votes defcut_for_search(self,sentence,stopword=True): seg_list=jieba.cut_for_search(sentence) results=[] forseginseg_list: ifstopwordandseginself.stopwords: continue results.append(seg) returnresultsExample8Project: Customer-Chatbot Author:WenRichard File:jiebaSegment.py License:MITLicense5 votes defcut_for_search(self,sentence,stopword=True): seg_list=jieba.cut_for_search(sentence) results=[] forseginseg_list: ifstopwordandseginself.stopwords: continue results.append(seg) returnresultsExample9Project: QAmodel-for-Retrievalchatbot Author:WenRichard File:jiebaSegment.py License:MITLicense5 votes defcut_for_search(self,sentence,stopword=True): seg_list=jieba.cut_for_search(sentence) results=[] forseginseg_list: ifstopwordandseginself.stopwords: continue results.append(seg) returnresultsExample10Project: min Author:ziyueit File:min.py License:GNUGeneralPublicLicensev2.05 votes defsearch(self,keywords,start=0,length=20): """ 搜索关键字 """ seg_list=list(jieba.cut_for_search(keywords)) key_list=self.search_by_words(seg_list,start,length) returnkey_listExample11Project: min Author:ziyueit File:min.py License:GNUGeneralPublicLicensev2.05 votes defadd_content(self,content,obj_key): """ 添加文档到索引 """ seg_list=jieba.cut_for_search(content) seg_list=min_nlp.get_weight(seg_list) self.add_word_index(seg_list,obj_key)Example12Project: annotated_jieba Author:ustcdane File:jieba_test.py License:MITLicense5 votes deftestCutForSearch(self): forcontentintest_contents: result=jieba.cut_for_search(content) assertisinstance(result,types.GeneratorType),"TestCutForSearchGeneratorerror" result=list(result) assertisinstance(result,list),"TestCutForSearcherroroncontent:%s"%content print(",".join(result),file=sys.stderr) print("testCutForSearch",file=sys.stderr)Example13Project: annotated_jieba Author:ustcdane File:jieba_test.py License:MITLicense5 votes deftestCutForSearch_NOHMM(self): forcontentintest_contents: result=jieba.cut_for_search(content,HMM=False) assertisinstance(result,types.GeneratorType),"TestCutForSearchGeneratorerror" result=list(result) assertisinstance(result,list),"TestCutForSearcherroroncontent:%s"%content print(",".join(result),file=sys.stderr) print("testCutForSearch_NOHMM",file=sys.stderr)Example14Project: annotated_jieba Author:ustcdane File:test_cut_for_search.py License:MITLicense5 votes defcuttest(test_sent): result=jieba.cut_for_search(test_sent) forwordinresult: print(word,"/",end='') print("")Example15Project: annotated_jieba Author:ustcdane File:test_cut_for_search.py License:MITLicense5 votes defcuttest(test_sent): result=jieba.cut_for_search(test_sent) forwordinresult: print(word,"/",end='') print("")Example16Project: annotated_jieba Author:ustcdane File:test_multithread.py License:MITLicense5 votes defrun(self): seg_list=jieba.cut("我来到北京清华大学",cut_all=True) print("FullMode:"+"/".join(seg_list))#全模式 seg_list=jieba.cut("我来到北京清华大学",cut_all=False) print("DefaultMode:"+"/".join(seg_list))#默认模式 seg_list=jieba.cut("他来到了网易杭研大厦") print(",".join(seg_list)) seg_list=jieba.cut_for_search("小明硕士毕业于中国科学院计算所,后在日本京都大学深造")#搜索引擎模式 print(",".join(seg_list))Example17Project: Malicious_Domain_Whois Author:h-j-13 File:jieba_test.py License:GNUGeneralPublicLicensev3.05 votes deftestCutForSearch(self): forcontentintest_contents: result=jieba.cut_for_search(content) assertisinstance(result,types.GeneratorType),"TestCutForSearchGeneratorerror" result=list(result) assertisinstance(result,list),"TestCutForSearcherroroncontent:%s"%content print(",".join(result),file=sys.stderr) print("testCutForSearch",file=sys.stderr)Example18Project: Malicious_Domain_Whois Author:h-j-13 File:jieba_test.py License:GNUGeneralPublicLicensev3.05 votes deftestCutForSearch_NOHMM(self): forcontentintest_contents: result=jieba.cut_for_search(content,HMM=False) assertisinstance(result,types.GeneratorType),"TestCutForSearchGeneratorerror" result=list(result) assertisinstance(result,list),"TestCutForSearcherroroncontent:%s"%content print(",".join(result),file=sys.stderr) print("testCutForSearch_NOHMM",file=sys.stderr)Example19Project: Malicious_Domain_Whois Author:h-j-13 File:test_cut_for_search.py License:GNUGeneralPublicLicensev3.05 votes defcuttest(test_sent): result=jieba.cut_for_search(test_sent) forwordinresult: print(word,"/",end='') print("")Example20Project: Malicious_Domain_Whois Author:h-j-13 File:test_cut_for_search.py License:GNUGeneralPublicLicensev3.05 votes defcuttest(test_sent): result=jieba.cut_for_search(test_sent) forwordinresult: print(word,"/",end='') print("")Example21Project: Malicious_Domain_Whois Author:h-j-13 File:test_multithread.py License:GNUGeneralPublicLicensev3.05 votes defrun(self): seg_list=jieba.cut("我来到北京清华大学",cut_all=True) print("FullMode:"+"/".join(seg_list))#全模式 seg_list=jieba.cut("我来到北京清华大学",cut_all=False) print("DefaultMode:"+"/".join(seg_list))#默认模式 seg_list=jieba.cut("他来到了网易杭研大厦") print(",".join(seg_list)) seg_list=jieba.cut_for_search("小明硕士毕业于中国科学院计算所,后在日本京都大学深造")#搜索引擎模式 print(",".join(seg_list))Example22Project: Information_retrieva_Projectl- Author:Google1234 File:main.py License:MITLicense5 votes defGET(self): data=web.input() ifdata: searchword=data.searchword else: searchword='' news_list=list() topic=list() ifsearchword: cut=jieba.cut_for_search(searchword) word_list=[] forwordincut: ifwordnotinpunctandwordnotinLetters_and_numbers: word_list.append(word.encode("utf-8")) topK=query.calculate(word_list,config.query_return_numbers) forkintopK: data=dict() title,content,url=id_index.get_data(k) data['id']=k data['content']=content.decode("utf-8")[:config.query_return_snipper_size] data['title']=title.decode("utf-8") data['url']=url.decode("utf-8") news_list.append(data) deldata,cut,word_list,word,topK,title,content,url #word2Vec推荐相似主题 word2vec.cal(searchword.encode('utf-8')) printword2vec.result.length ifword2vec.result.length==0:#词不存在,长度为1 pass else: foriinrange(config.recommand_topic_numbers): topic.append(word2vec.result.word[i].char) returnrender.index(searchword,news_list,topic)Example23Project: Information_retrieva_Projectl- Author:Google1234 File:main.py License:MITLicense5 votes defGET(self): data=web.input() ifdata: ID=data.id news=dict() title,content,url=id_index.get_data(int(ID)) news['content']=content.decode("utf-8") news['title']=title.decode("utf-8") news['url']=url.decode("utf-8") recomand=[] #在线方法 cut=jieba.cut_for_search(content) word_list=[] forwordincut: ifwordnotinpunctandwordnotinLetters_and_numbers: #计算文档间相似度,必须去停用词,否则太慢 ifrecommand.stopword.has_key(word.encode("utf-8")): pass else: word_list.append(word.encode("utf-8")) topk=recommand.calculate(word_list,config.recommand_numbers,10) foriintopk:#在线方法 #foriinrecommand.dic[int(ID)]:#离线方法 ifi!=int(ID): title,content,url=id_index.get_data(i) recomand.append([title.decode('utf-8'),content.decode('utf-8'),url.decode('utf-8')]) news['recommand']=recomand deltitle,content,url,recomand else: ID='' news=dict() news['title']="NoSuchNews" news['content']="OhNo!" news['url']="#" news['recommand']=[['','','']forminrange(config.recommand_numbers)] returnrender.news(news)Example24Project: Information_retrieva_Projectl- Author:Google1234 File:similar_doc.py License:MITLicense5 votes defcalculate(self,doc_id,Top_numbers=10,multiple=10): title,content,url=self.index.get_data(doc_id) cut=jieba.cut_for_search(content) word_list=[] forwordincut: ifwordnotinself.punctandwordnotinself.Letters_and_numbers: #计算文档间相似度,必须去停用词,否则太慢 ifself.stopword.has_key(word.encode("utf-8")): pass else: word_list.append(word.encode("utf-8")) returnself.FastCos.calculate(word_list,Top_numbers,multiple)AboutPrivacyContact
延伸文章資訊
- 1中文分词工具jieba分词器的使用 - 51CTO博客
jieba.cut和jieba.cut_for_search返回的结果都是一个可迭代的生成器,可以 ... jieba可以很方便地获取中文词性,通过jieba.posseg模块实现词性标注.
- 2Python Examples of jieba.cut_for_search - ProgramCreek.com
def testCutForSearch(self): for content in test_contents: result = jieba.cut_for_search(content) ...
- 3fxsjy/jieba: 结巴中文分词
Contribute to fxsjy/jieba development by creating an account on GitHub. ... jieba.cut_for_search ...
- 4python 結巴分詞(jieba)詳解 - 程式人生
"Jieba" (Chinese for "to stutter") Chinese text segmentation: ... jieba.cut_for_search 方法接受兩個引數:需...
- 5LiveMirror/jieba: 结巴中文分词做最好的Python分词组件 - GitHub
Contribute to LiveMirror/jieba development by creating an account on GitHub. ... jieba.cut_for_se...