Python wordcloud 模块,WordCloud() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用wordcloud.WordCloud()

项目:lyricswordcloud    作者:qwertyyb    | 项目源码 | 文件源码
def showData(self):
    print('???,????···')
    mask = imread(self.picfile)
    imgcolor = ImageColorGenerator(mask)
    wcc = WordCloud(font_path='./msyhl.ttc', 
    mask=mask, background_color='white', 
    max_font_size=200, 
    max_words=300,
    color_func=imgcolor
    )
    wc = wcc.generate_from_frequencies(self.data)
    plt.figure()
    plt.imshow(wc)
    plt.axis('off')
    print('?????')
    plt.show()
项目:SFBIStats    作者:royludo    | 项目源码 | 文件源码
def create_wordcloud(corpus, output, stopword_dict):
    lex_dic = build_lex_dic(corpus, stopword_dict=stopword_dict)
    total_words = get_total_words(lex_dic)
    ordered_freq_list = build_freq_list(lex_dic, total_words)

    fig = plt.figure(figsize=(10, 8), frameon=False)
    ax = plt.Axes(fig, [0., 0., 1., 1.])
    ax.set_axis_off()
    fig.add_axes(ax)
    wordcloud = WordCloud(width=1000, height=800, max_words=100, background_color='white',
                          relative_scaling=0.7, random_state=15, prefer_horizontal=0.5).generate_from_frequencies(
        ordered_freq_list[0:100])
    wordcloud.recolor(random_state=42, color_func=my_color_func)

    ax.imshow(wordcloud)
    fig.savefig(output, facecolor='white')
项目:WebAppEx    作者:karlafej    | 项目源码 | 文件源码
def get_plot(limit, txt, wc_mask=wc_mask, stop = english_stopwords):
    wordcloud = WordCloud(
        max_words=limit,
        stopwords=stop,
        mask=wc_mask
        ).generate(txt)
    fig = plt.figure()
    fig.set_figwidth(8)
    fig.set_figheight(8)
    plt.imshow(wordcloud, interpolation="bilinear")
    plt.axis('off')

    figfile = BytesIO()
    plt.savefig(figfile, format='png')
    figfile.seek(0) 
    figdata_png = base64.b64encode(figfile.getvalue()).decode()
    return figdata_png
项目:cloudy_search    作者:tim-shane    | 项目源码 | 文件源码
def create_cloud(self):
        # Return Bing search snippets
        text = self.return_txt()

        # Get mask image from Bing
        image_mask = np.array(self.return_img())

        # potential feature
        stopwords = set(STOPWORDS)
        # stopwords.add(search_modifier)

        wordcloud = WordCloud(background_color="white", mask=image_mask, stopwords=stopwords)
        wordcloud.generate(text)

        image_colors = ImageColorGenerator(image_mask)
        plt.imshow(image_mask, cmap=plt.cm.gray, interpolation="None")
        plt.imshow(wordcloud.recolor(color_func=image_colors), alpha=.8, interpolation='None')
        plt.axis("off")
        return plt
项目:QProb    作者:quant-trade    | 项目源码 | 文件源码
def full_wordcloud():
    """
    Generates wordcloud for the site.
    """
    text = ""
    try:
        posts = Post.objects.filter().values("content")
        for post in posts:
            text += post["content"] + " "

        text = words_wo_stopwords(text=text)
        word_cloud = WordCloud(max_font_size=40, background_color="rgba(255, 255, 255, 0)", width=350, height=600, mode="RGBA").generate(text)
        fig = plt.figure(frameon=False)
        fig.patch.set_visible(False)
        ax = fig.add_axes([0, 0, 1, 1])
        ax.axis('off')
        ax.imshow(word_cloud, interpolation='bilinear')
        plt.savefig(join(settings.STATIC_ROOT, 'images', 'wordcloud.png'))
        plt.close()
    except Exception as err:
            print(err)
项目:QProb    作者:quant-trade    | 项目源码 | 文件源码
def posts_wordcloud():
    """
    Generates wordcloud foeach post.
    """
    posts = Post.objects.filter().exclude(content="")
    for post in posts:
        try:
            image_file = join(settings.STATIC_ROOT, "wordcloud", "{0}.png".format(post.slug))

            if not isfile(image_file):
                text = words_wo_stopwords(text=post.content)
                if len(text) > 100:
                    word_cloud = WordCloud(max_font_size=40, background_color="rgba(255, 255, 255, 0)", width=800, height=350, mode="RGBA").generate(text)
                    fig = plt.figure(frameon=False)
                    fig.patch.set_visible(False)
                    ax = fig.add_axes([0, 0, 1, 1])
                    ax.axis('off')
                    ax.imshow(word_cloud, interpolation='bilinear')
                    plt.savefig(image_file)
                    plt.close()
                    post.wordcloud = "static/wordcloud/{0}.png".format(post.slug)
                    post.save()
        except Exception as err:
            print(err)
项目:QProb    作者:quant-trade    | 项目源码 | 文件源码
def make_wordcloud(entry):
    """
    Makes singular wordcloud for a post.
    """
    text = words_wo_stopwords(text=entry.content)
    if len(text) > 100:
        word_cloud = WordCloud(max_font_size=60, background_color="rgba(255, 255, 255, 0)", mode="RGBA").generate(text)
        fig = plt.figure(frameon=False)
        fig.patch.set_visible(False)
        ax = fig.add_axes([0, 0, 1, 1])
        ax.axis('off')
        ax.imshow(word_cloud, interpolation='bilinear')
        plt.savefig(join(settings.STATIC_ROOT, "wordcloud", "{0}.png".format(entry.slug)))
        plt.close()
        entry.wordcloud = "static/wordcloud/{0}.png".format(entry.slug)

    return entry
项目:csss-minion    作者:henrymzhao    | 项目源码 | 文件源码
def populateCaches(self):
        try:
            cur = self.bot.conn_wc.cursor()
            cur.execute("SELECT msgs FROM "+self.tablename) # hashtag no limits
            entries = cur.fetchall()
            arr = []
            for i in range(0, len(entries)):
                arr.append(entries[i][0])
            if len(arr) < 1:
                self.serverCache = self.backupArr
            else:
                self.serverCache = arr
        except Exception as e:
            print("server cache retrieval error: \n", e)
            self.serverCache = self.backupArr
        text = " ".join(self.serverCache)
        print("generating word cloud")
        wc = WordCloud(width=1024, height=1024, max_words=200000, stopwords=self.STOPWORDS).generate(text) # take it to the limit
        wc.to_file(self.serverImage)
项目:text_analysis    作者:mathlf2015    | 项目源码 | 文件源码
def get_result(url_set):
    line_set = []
    for url in url_set:
        wb_data = requests.get(url,headers = headers)
        soup = BeautifulSoup(wb_data.text,'lxml')
        a = soup.select('span.ctt')
        for i in range(len(a)):
            text = re.sub('<[^>]*>', '',a[i].text)
            text = re.sub('??', ' ', text)
            text = re.sub('[\W]+', ' ', text)
            line_set.append(text)
            #print(text)
            #writer.writerow((i,text))
    word_list = [" ".join(jieba.cut(sentence)) for sentence in line_set]
    new_text = ' '.join(word_list)
    wordcloud = WordCloud(font_path="C:/Python34/Lib/site-packages/wordcloud/simhei.ttf", background_color="black").generate(new_text)
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.show()
项目:matplotlib_venn_wordcloud    作者:paulbrodersen    | 项目源码 | 文件源码
def _get_wordcloud(img, patch, words, word_to_frequency=None, **wordcloud_kwargs):

    # get the boolean mask corresponding to each patch
    path = patch.get_path()
    mask = path.contains_points(img.pixel_coordinates).reshape((img.y_resolution, img.x_resolution))

    # make mask matplotlib-venn compatible
    mask = (~mask * 255).astype(np.uint8) # black indicates mask position
    mask = np.flipud(mask) # origin is in upper left

    # create wordcloud
    wc = WordCloud(mask=mask,
                   background_color=None,
                   mode="RGBA",
                   **wordcloud_kwargs)

    if not word_to_frequency:
        text = " ".join(words)
        wc.generate(text)
    else:
        wc.generate_from_frequencies({word: word_to_frequency[word] for word in words})

    return wc
项目:51job    作者:chenjiandongx    | 项目源码 | 文件源码
def world_cloud():
        """ ????
        """
        counter = {}
        with open(os.path.join("data", "post_pre_desc_counter.csv"),
                  "r", encoding="utf-8") as f:
            f_csv = csv.reader(f)
            for row in f_csv:
                counter[row[0]] = counter.get(row[0], int(row[1]))
            pprint(counter)
        file_path = os.path.join("font", "msyh.ttf")
        wc = WordCloud(font_path=file_path,
                       max_words=100,
                       height=600,
                       width=1200).generate_from_frequencies(counter)
        plt.imshow(wc)
        plt.axis('off')
        plt.show()
        wc.to_file(os.path.join("images", "wc.jpg"))
项目:courses.uno    作者:BenDoan    | 项目源码 | 文件源码
def plot_cloud(text):

    # mask, max_words = np.array(Image.open(path.join(d, "uno_mask.png"))), 200
    mask, max_words = np.array(Image.open(path.join(d, "mav_mask.png"))), 300
    stopwords = STOPWORDS.union(common_words)
    wordcloud = WordCloud(background_color="white", width=2400, height=2400, mask=mask, stopwords=stopwords, max_words=max_words).generate(text)#.recolor(color_func=grey_color_func, random_state=3)

    # Open a plot of the generated image.
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis("off")
    # import IPython; IPython.embed()

    fig = plt.gcf()
    fig.set_size_inches(18.5, 10.5)
    canvas = FigureCanvas(fig)
    png_output = BytesIO()
    canvas.print_png(png_output)

    return png_output.getvalue()
项目:FacebookGraphAPI-Examples    作者:nikhilkumarsingh    | 项目源码 | 文件源码
def wcloud(text):
     mask = np.array(Image.open("face_mask.png"))   #choose mask
     stopwords = set(STOPWORDS)
     wc = WordCloud(background_color="white",
                    mask=mask,
                    max_words=80,
                    stopwords=stopwords,
                    width=800,
                    height=400,
                    mode="RGB",
                    relative_scaling=0.5,
                    )

     text = clean_text(text)
     wc.generate(text)

     #save image
     file_name = raw_input("Enter any name for the Word Cloud image:") +'.png'    
     wc.to_file(file_name)

     return
项目:web-crawler-tutorial    作者:jwlin    | 项目源码 | 文件源码
def lyrics():
    with open('lyrics.json', 'r', encoding='utf-8') as f:
        data = json.load(f)
    tokens = list()
    for v in data.values():
        # ??????, ???????? 2 ??, ?????
        tokens += [seg for seg in jieba.cut(v) if seg.split() and len(seg) > 1]

    # ?? tokens ?????????
    counter = Counter(tokens)
    print(counter.most_common(10))

    # ???, ???????????
    wcloud = WordCloud(font_path='NotoSansMonoCJKtc-Regular.otf').generate(' '.join(tokens))
    plt.imshow(wcloud)
    plt.axis('off')
    plt.show()
项目:TwitterPeruser    作者:ilyauts    | 项目源码 | 文件源码
def generateWordCloud(text):
    # read the mask / color image
    # taken from http://jirkavinse.deviantart.com/art/quot-Real-Life-quot-Alice-282261010
    d = path.dirname(__file__)

    cloud_coloring = np.array(Image.open(path.join(d, "us-mask-white.png")))
    stopwords = set(STOPWORDS)
    stopwords.add("said")

    wc = WordCloud(background_color="black", max_words=2000, mask=cloud_coloring,
                   stopwords=stopwords, max_font_size=40, random_state=42)
    # generate word cloud
    wc.generate(text)

    # create coloring from image
    image_colors = ImageColorGenerator(cloud_coloring)

    # show
    plt.imshow(wc)
    plt.axis("off")
    plt.show()
项目:TwitterPeruser    作者:ilyauts    | 项目源码 | 文件源码
def generateTable(text, n=5):
    # Start by getting a frequency dictionary
    d = path.dirname(__file__)

    cloud_coloring = np.array(Image.open(path.join(d, "us-mask-white.png")))
    stopwords = set(STOPWORDS)
    stopwords.add("said")

    wc = WordCloud(background_color="black", max_words=2000, mask=cloud_coloring,
                   stopwords=stopwords, max_font_size=40, random_state=42)

    frequenciesDict = wc.process_text(text)

    words = frequenciesDict.keys()
    freq = frequenciesDict.values()

    frequencies = pd.DataFrame({ 'words' : words, 'frequencies' : freq })
    frequencies.sort_values('frequencies', ascending = False, inplace = True)

    print '\nTop 5 Terms\n'
    print frequencies.head(n = n).to_string(index = False)
    print '\n'
项目:crawler_html2pdf    作者:lzjun567    | 项目源码 | 文件源码
def generate_image():
    data = []
    jieba.analyse.set_stop_words("./stopwords.txt")

    with codecs.open("weibo1.txt", 'r', encoding="utf-8") as f:
        for text in f.readlines():
            data.extend(jieba.analyse.extract_tags(text, topK=20))
        data = " ".join(data)
        mask_img = imread('./52f90c9a5131c.jpg', flatten=True)
        wordcloud = WordCloud(
            font_path='msyh.ttc',
            background_color='white',
            mask=mask_img
        ).generate(data)
        plt.imshow(wordcloud.recolor(color_func=grey_color_func, random_state=3),
                   interpolation="bilinear")
        plt.axis('off')
        plt.savefig('./heart2.jpg', dpi=1600)
项目:wordclouds    作者:inmagik    | 项目源码 | 文件源码
def save_cloud(frequencies, output, options={}, color_func=None,canvas_width=0, canvas_height=0):
    base_options = copy(WORD_CLOUD_DEFAULTS)
    base_options.update(options)
    clean_options = { x : base_options[x] for x in base_options if base_options[x] is not None}

    wordcloud = WordCloud(**clean_options).generate_from_frequencies(frequencies)

    if(color_func):
        wordcloud = wordcloud.recolor(color_func=color_func)

    image = wordcloud.to_image()

    if clean_options.get("height") != clean_options.get("width") and not canvas_width and not canvas_height:
        canvas_height = clean_options.get("height")
        canvas_width = clean_options.get("width")

    if(canvas_width and canvas_height):
        final_image =  Image.new(image.mode, (canvas_width, canvas_height), clean_options.get("background_color"))
        offset = ((final_image.size[0] - image.size[0]) / 2, (final_image.size[1] - image.size[1]) / 2)
        final_image.paste(image, offset)
        return final_image.save(output)

    return image.save(output)
项目:cnblogs    作者:hao15239129517    | 项目源码 | 文件源码
def main():
    client = pymongo.MongoClient(host='127.0.0.1', port=27017)
    dbName = client['cnblogs']
    table = dbName['articles']
    wc = WordCloud(
        font_path='msyh.ttc', background_color='#ccc', width=600, height=600)
    if not os.path.exists('wordcloudimgs'):
        os.mkdir('wordcloudimgs')
    threads = []
    queue = Queue()
    titleThread = MyThread(getTitle, (queue, table))
    imgThread = MyThread(getImg, (queue, wc))
    threads.append(imgThread)
    threads.append(titleThread)

    for t in threads:
        t.start()
    for t in threads:
        t.join()
项目:CloudMusic-Crawler    作者:GreatV    | 项目源码 | 文件源码
def generate_wordcloud(words_list, mask_path):
    text = ' '.join(words_list)
    # print text
    mask = np.array(Image.open(mask_path))
    # stopwords = set(STOPWORDS)
    # stopwords.add(u'')

    wc = WordCloud(font_path = 'data/SourceHanSerifCN-Regular.otf', background_color = 'white', 
        max_words = 2000, mask = mask) # ??????????

    wc.generate(text)

    # wc.to_file('data/path/to/file')

    plt.imshow(wc, interpolation='bilinear')
    plt.axis("off")
    plt.show()
项目:LizardDance    作者:guerbai    | 项目源码 | 文件源码
def generate_ciyun_pic():
    import matplotlib.pyplot as plt
    from wordcloud import WordCloud
    import jieba
    from cv2 import imread

    text_from_file_with_apath = open('./{}lyric.txt'.format(singer), 'r').read().replace('??', '').replace('??', '')

    wordlist_after_jieba = jieba.cut(text_from_file_with_apath, cut_all = True)
    wl_space_split = " ".join(wordlist_after_jieba)

    mask_img = imread('./mask.jpg')# , flatten=True)
    my_wordcloud = WordCloud(
            font_path='msyh.ttc',
            background_color='white',
            mask=mask_img
            ).generate(wl_space_split)

    plt.imshow(my_wordcloud)
    plt.axis("off")
    plt.show()
项目:lianjia    作者:learrn    | 项目源码 | 文件源码
def title_word_cloud():
    """
    ???????
    """
    text = ''
    wc = WordCloud(background_color='white',  # ??????
                   stopwords=STOPWORDS,
                   max_words=1000,  # ?????????
                   font_path='C:/Python27/Lib/site-packages/matplotlib/mpl-data/fonts/ttf/simhei.ttf',
                   # ?????????????????
                   max_font_size=50,  # ???????
                   random_state=30,  # ??????????????????????
                   )
    with open('rent_ave.csv') as csvfile:
        reader = [each for each in csv.DictReader(csvfile)]
    for row in reader:
        text += row[u'title'] + ' '
    print jieba_clear_text(text)
    wc.generate(jieba_clear_text(text))
    plt.imshow(wc)
    plt.axis('off')
    plt.show()
项目:newsclouds-engine    作者:inmagik    | 项目源码 | 文件源码
def save_cloud(frequencies, output, options={}, color_func=None,canvas_width=0, canvas_height=0):
    base_options = copy(WORD_CLOUD_DEFAULTS)
    base_options.update(options)
    clean_options = { x : base_options[x] for x in base_options if base_options[x] is not None}

    wordcloud = WordCloud(**clean_options).generate_from_frequencies(frequencies)

    if(color_func):
        wordcloud = wordcloud.recolor(color_func=color_func)

    image = wordcloud.to_image()

    if clean_options.get("height") != clean_options.get("width") and not canvas_width and not canvas_height:
        canvas_height = clean_options.get("height")
        canvas_width = clean_options.get("width")

    if(canvas_width and canvas_height):
        final_image =  Image.new(image.mode, (canvas_width, canvas_height), clean_options.get("background_color"))
        offset = (int((final_image.size[0] - image.size[0]) / 2), int((final_image.size[1] - image.size[1]) / 2))

        final_image.paste(image, offset)
        return final_image.save(output)

    return image.save(output)
项目:InterestingCrawler    作者:Maicius    | 项目源码 | 文件源码
def drawWordCloud(word_text, filename):
    mask = imread('hello.jpg')
    my_wordcloud = WordCloud(
        background_color='white',  # ??????
        mask=mask,  # ??????
        max_words=2000,  # ?????????
        stopwords=STOPWORDS,  # ?????
        font_path='/System/Library/Fonts/Hiragino Sans GB W6.ttc',  # ?????????????????
        max_font_size=50,  # ???????
        random_state=30,  # ??????????????????????
        scale=1
    ).generate(word_text)
    image_colors = ImageColorGenerator(mask)
    my_wordcloud.recolor(color_func=image_colors)
    # ????????
    plt.imshow(my_wordcloud)
    plt.axis("off")
    plt.show()
    # ????
    my_wordcloud.to_file(filename=filename)
    print()
项目:InterestingCrawler    作者:Maicius    | 项目源码 | 文件源码
def drawWordCloud(word_text, filename):
    mask = imread('bike.jpg')
    my_wordcloud = WordCloud(
        background_color='white',  # ??????
        mask=mask,  # ??????
        max_words=2000,  # ?????????
        stopwords=STOPWORDS,  # ?????
        font_path='/System/Library/Fonts/Hiragino Sans GB W6.ttc',  # ?????????????????
        max_font_size=50,  # ???????
        random_state=30,  # ??????????????????????
        scale=1.3
    ).generate(word_text)
    image_colors = ImageColorGenerator(mask)
    my_wordcloud.recolor(color_func=image_colors)
    # ????????
    plt.imshow(my_wordcloud)
    plt.axis("off")
    plt.show()
    # ????
    my_wordcloud.to_file(filename=filename)
    print()
项目:webcrawling    作者:etilelab    | 项目源码 | 文件源码
def analyze(content):
    # ????? ???? content ? string ??? ????
    # ????? ??? nouns ?? ??? ??
    nouns=t.nouns(str(content))

    # ????? ??
    trash=["??","????","??","??","??","??","?????"]
    for i in trash:
        for j in nouns:
            if i==j:
                nouns.remove(i)

    ko=nltk.Text(nouns,name="??")

    #ranking??? ??? ????? ??
    ranking=ko.vocab().most_common(100)
    tmpData=dict(ranking)

    # ?????? ??
    wordcloud=WordCloud(font_path="/Library/Fonts/AppleGothic.ttf",relative_scaling=0.2,background_color="white",).generate_from_frequencies(tmpData)

    #matplotlib ?????? ?? ??????? ??? ???? ???
    plt.figure(figsize=(16,8))
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.show()




# ??? ??(??? ????? ???? ???? ? ?????? ??? ??)
项目:eea.corpus    作者:eea    | 项目源码 | 文件源码
def wordcloud_visualization(corpus, topics, num_docs=None, min_df=0.1,
                            ngrams=1, weighting='tf', max_df=0.7, mds='pcoa',
                            *args, **kwargs):
    font = pkg_resources.resource_filename(__name__,
                                           "fonts/ZillaSlab-Medium.ttf")
    print(font)
    model, doc_term_matrix, vectorizer = build_model(
        corpus, topics, num_docs, ngrams, weighting, min_df, max_df
    )
    prep_data = prepare(model.model, doc_term_matrix, vectorizer, mds=mds)
    ti = prep_data.topic_info
    topic_labels = ti.groupby(['Category']).groups.keys()

    plt.clf()
    topics = []
    for label in topic_labels:
        out = StringIO()
        df = ti[ti.Category == label].sort_values(by='Total',
                                                     ascending=False)[:20]
        tf = dict(df[['Term', 'Total']].to_dict('split')['data'])

        wc = wordcloud.WordCloud(font_path=font, width=600, height=300,
                                 background_color='white')
        wc.fit_words(tf)
        plt.imshow(wc)
        plt.axis('off')
        plt.savefig(out)
        out.seek(0)
        topics.append((label, out.read()))

    return topics
    """
     Category         Freq            Term        Total  loglift  logprob
term
478   Default   738.000000          specie   738.000000   1.0000   1.0000
...       ...          ...             ...          ...      ...      ...
191   Topic10    25.344278           space   145.983738   1.8935  -5.0376
190   Topic10    32.076070           green   193.201661   1.8488  -4.8020
319   Topic10    12.129367          aspect    73.063725   1.8488  -5.7745

"""
项目:glassdoor-analysis    作者:THEdavehogue    | 项目源码 | 文件源码
def plot_topic(self, topic_idx):
        '''
        Function to plot a wordcloud based on a topic

        INPUT:
            topic_idx: index of topic from NMF clustering
        '''
        title = raw_input('Enter a title for this plot: ')
        num_reviews = self.labels[:, topic_idx].sum()
        word_freq = self.topic_word_frequency(topic_idx)
        wc = WordCloud(width=2000, height=1000, max_words=150,
                       background_color='white')
        wc.fit_words(word_freq)
        fig = plt.figure(figsize=(16, 8))
        ax = fig.add_subplot(111)
        ax.set_title('Topic {}: {}\nNumber of Reviews in Topic: {}'.format(
            topic_idx, title, num_reviews), fontsize=24)
        ax.axis('off')
        ax.imshow(wc)
        name = 'topic_' + str(topic_idx) + '.png'
        if self.pro_or_con == 'pro':
            img_path = os.path.join('images', 'positive')
        else:
            img_path = os.path.join('images', 'negative')
        plt.savefig(os.path.join(img_path, name))
        plt.show()
项目:Jobs-search    作者:Hopetree    | 项目源码 | 文件源码
def get_wc(word_dic,fontname,savename,photoname):
    '''??4?????????????????????????'''
    colors = imread(photoname)
    wc = WordCloud(background_color='white', mask=colors, font_path=fontname, max_font_size=150)
    wc.generate_from_frequencies(word_dic)
    plt.imshow(wc)
    plt.axis('off')
    wc.to_file(savename)
    print('get the photo {} !'.format(savename))
项目:stackoverflow_tag_cloud    作者:droyed    | 项目源码 | 文件源码
def tag_cloud(link=22656, lim_num_tags=200, image_dims=(400, 200),
              out_filepath="TagCloud.png"):
    """ Generate tag cloud and save it as an image.

    Parameters
    ----------
    link : same as used for the function taginfo.

    num_tags : same as used for the function taginfo.

    image_dims : tuple of two elements.
        Image dimensions of the tag cloud image to be saved.

    out_filepath : string
        Output image filepath.

    Output
    ------
    None
    """

    W, H = image_dims    # Wordcloud image size (width, height)
    font_path = "fonts/ShortStack-Regular.ttf"  # Font path
    info = taginfo(link=link, lim_num_tags=lim_num_tags)
    if info is None:
        print("Error : No webpage found!")
    else:
        if len(info) == 0:
            print("Error : No tags found!")
        else:         # Successfully extracted tag info
            WC = WordCloud(font_path=font_path, width=W, height=H,
                           max_words=len(info)).generate_from_frequencies(info)
            WC.to_image().save(out_filepath)
            print("Tag Cloud Saved as " + out_filepath)
项目:EnglishDiary    作者:jupiny    | 项目源码 | 文件源码
def set_wordcloud_image(words):

    if words:
        # WordCloud Option
        wc = WordCloud(
            background_color=settings.WORDCLOUD_BACKGROUND_COLOR,
            width=settings.WORDCLOUD_WIDTH,
            height=settings.WORDCLOUD_HEIGHT,
            max_words=settings.WORDCLOUD_MAX_WORDS,
            max_font_size=settings.WORDCLOUD_MAX_FRONT_SIZE,
            scale=settings.WORDCLOUD_SCALE,
        )
        wordcloud_img = wc.generate(words).to_image()
        return wordcloud_img
    return None
项目:wntf    作者:tonybaloney    | 项目源码 | 文件源码
def word_cloud(f):
    wordcloud = WordCloud().generate_from_frequencies(f)
    # Open a plot of the generated image.
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.savefig('out/word_cloud.png', dpi=300, format='png')
项目:rekognition-video-utils    作者:awslabs    | 项目源码 | 文件源码
def generate_wordcloud():
    from wordcloud import WordCloud
    wordcloud = WordCloud(background_color="white")

    from operator import itemgetter
    item1 = itemgetter(1)
    frequencies = sorted(label_counts.items(), key=item1, reverse=True)
    wordcloud.generate_from_frequencies(frequencies)

    # save image
    import matplotlib.pyplot as plt
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.savefig('photo_tags')
项目:gctag    作者:Fenghuapiao    | 项目源码 | 文件源码
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--url', metavar='URL', default=None, help='input the url')
    parser.add_argument('--output', metavar='OUTPUT', default='./wordcloud.jpg', help='input the output_file')
    parser.add_argument('--input', metavar='INPUT_FIEL', default=None, help='input the input_file')
    parser.add_argument('--model', metavar='INPUT_IMAGE_MODEL', default=None, help='input the input_image_model')
    parser.add_argument('--ttf', metavar='INPUT_TTF', default='./font/simhei.ttf', help='input the typeface')
    parser.add_argument('--width', metavar='INPUT_WIDTH', default=1800, type=int, help='input the image width')
    parser.add_argument('--height', metavar='INPUT_HEIGHT', default=1000, type=int, help='input the image height')
    parser.add_argument('--bg', metavar='INPUT_BACKGROUND_COLOR', default='black', help='input the image background_color')
    parser.add_argument('--margin', metavar='INPUT_MARGIN', default=5, type=int, help='input the image margin')
    parser.add_argument('--max_font_size', metavar='INPUT_max_font_size', default=60, type=int, help='input the max_font_size')
    args = parser.parse_args()

    url = args.url
    output_file = args.output
    input_file = args.input
    model_path = args.model
    typeface = args.ttf
    max_font_size=args.max_font_size
    width = args.width
    height = args.height
    background_color = args.bg
    margin = args.margin

    try:
        image_mask = np.array(PIL.Image.open(model_path))
    except:
        image_mask=None

    wordcloud = WordCloud(font_path=typeface, mask=image_mask, max_font_size=max_font_size,
                          background_color=background_color, margin=margin, width=width, height=height)
    try:
        txt_join = get_txt(input_file)
        wordcloud_ = wordcloud.generate(txt_join)
    except:
        html_text = get_html_text(url)
        wordcloud_ = wordcloud.generate(html_text)

    image = wordcloud_.to_image()
    image.save(output_file)
项目:jd_spider    作者:roxylu    | 项目源码 | 文件源码
def show(self):
        wordcloud = WordCloud(
            font_path=u'./static/simheittf/simhei.ttf',
            background_color="black", max_words=40, margin=5, width=1000, height=800)

        wordcloud = wordcloud.generate(self.seg_text)

        plt.figure()
        plt.imshow(wordcloud)
        plt.axis("off")
        plt.show()
项目:PolBotCheck    作者:codeforfrankfurt    | 项目源码 | 文件源码
def save_wordcloud_image(frequencies, filename):
    wordcloud = WordCloud(width=1024, height=786, min_font_size=1).fit_words(frequencies)
    fig = plt.figure()
    fig.set_figwidth(12)
    fig.set_figheight(16)
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.savefig(filename, facecolor='k', bbox_inches='tight')
    print('imaged created')
项目:words_image    作者:flingjie    | 项目源码 | 文件源码
def generate_image(words, image):
    graph = np.array(image)
    wc = WordCloud(font_path=os.path.join(CUR_DIR, 'fonts/simhei.ttf'),
                   background_color='white', max_words=MAX_WORDS, mask=graph)
    wc.generate_from_frequencies(words)
    image_color = ImageColorGenerator(graph)
    return wc, image_color
项目:jaychou    作者:fantasysea    | 项目源码 | 文件源码
def wordcloudplot(txt,name):
    path = 'msyh.ttf'
    path = unicode(path, 'utf8').encode('gb18030')
    alice_mask = np.array(PIL.Image.open('jay.jpg'))
    wordcloud = WordCloud(font_path=path,
                          background_color="white",
                          margin=5, width=1800, height=800, mask=alice_mask, max_words=2000, max_font_size=60,
                          random_state=42)
    wordcloud = wordcloud.generate(txt)
    wordcloud.to_file('../songs/'+name+'/'+name+'.jpg')
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.show()
项目:Medium-crawler-with-data-analyzer    作者:lifei96    | 项目源码 | 文件源码
def get_wordcloud(file_path):
    with open(file_path, 'r') as f:
        text = f.read()
    wordcloud = WordCloud(max_font_size=200, min_font_size=25, prefer_horizontal=1, background_color='white', margin=0,
                          relative_scaling=0.5, colormap='copper', collocations=False, width=1600, height=800).generate(text)
    plt.figure()
    plt.imshow(wordcloud, interpolation="bilinear")
    plt.axis("off")
    plt.show()
项目:csss-minion    作者:henrymzhao    | 项目源码 | 文件源码
def createImage(self, arr, saveName):
        text = " ".join(arr)
        savedir = path.join(self.d,self.e, saveName) # local image gets overwritten each time. will this break if too many requests?
        wc = WordCloud(max_words=20000, stopwords=self.STOPWORDS).generate(text)
        wc.to_file(savedir)
        return savedir


        # idk how it subscribes to the event... but it works!
项目:Tweet_cloud    作者:yashasingh    | 项目源码 | 文件源码
def generate(self, title, text):
        wordcloud = WordCloud(max_font_size=40).generate(text)
        plt.figure()
        plt.imshow(wordcloud, interpolation='bilinear')
        plt.axis("off")
        # plt.show()
        filename = title + '.png'
        plt.savefig(filename,  bbox_inches='tight')
项目:warWolf    作者:wu-yy    | 项目源码 | 文件源码
def draw_wordcloud(file_name):
    with codecs.open(file_name,encoding='utf-8') as f:
        comment_text=f.read()
    color_mask=imread('template.png') #??????
    stopwords = ['png','douban','com','href','https','img','img3','class','source','icon','shire',u'??',u'??',u'??',u'??',u'??',u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??',
                 u'??', u'??', u'??', u'??']
    font = r'C:\Windows\Fonts\simfang.ttf'
    cloud=WordCloud(font_path=font,background_color='white',max_words=20000,max_font_size=200,min_font_size=4,mask=color_mask,stopwords=stopwords)
    word_cloud=cloud.generate(comment_text)  #????
    word_cloud.to_file('pjl_cloud.jpg')
项目:warWolf    作者:wu-yy    | 项目源码 | 文件源码
def draw_wordcloud(file_name):
    with codecs.open(file_name,encoding='utf-8') as f:
        comment_text=f.read()
    color_mask=imread('template.png') #??????
    stopwords = [u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??',
                 u'??', u'??', u'??', u'??']
    font = r'C:\Windows\Fonts\simfang.ttf'
    cloud=WordCloud(font_path=font,background_color='white',max_words=20000,max_font_size=200,min_font_size=4,mask=color_mask,stopwords=stopwords)
    word_cloud=cloud.generate(comment_text)  #????
    word_cloud.to_file('pjl_cloud.jpg')
项目:danmuWordCloud    作者:yutiansut    | 项目源码 | 文件源码
def show(self):
        # wordcloud = WordCloud(max_font_size=40, relative_scaling=.5)
        wordcloud = WordCloud(font_path=u'./static/simheittf/simhei.ttf',
                              background_color="black", margin=5, width=1800, height=800)

        wordcloud = wordcloud.generate(self.seg_text)

        plt.figure()
        plt.imshow(wordcloud)
        plt.axis("off")
        plt.show()
项目:text_analysis    作者:mathlf2015    | 项目源码 | 文件源码
def get_wordclud(file_set):
    line_set = []
    for j in range(10):
        reader=csv.reader(open(file_set[j], 'r'))
        for line in reader:
            line_set.append(line[1])
    word_list = [" ".join(jieba.cut(sentence)) for sentence in line_set]
    new_text = ' '.join(word_list)
    wordcloud = WordCloud(font_path="C:/Python34/Lib/site-packages/wordcloud/simhei.ttf", background_color="black").generate(new_text)
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.show()
项目:es_email_intel    作者:xujun10110    | 项目源码 | 文件源码
def gen_wordcloud():

    then = common_functions.queryrange(1)

    body = '''{
        "size" : 10000,
        "query": {
            "constant_score": {
                "filter": {
                    "range": {
                        "epoch": {
                            "from": '''+then+'''
                        }
                    }
                }
            }
        }
    }'''

    text = common_functions.pull_mailtext_24hrs(es, es_collection_name, body, keywords_list).lower()

    print text
    print

    wc = WordCloud(background_color="white", max_words=40)
    fileloc = "/home/pierre/es_email_intel/wordcloud.png"
    try:
        wc.generate(text)
        wc.to_file(fileloc)
        print 'Finished!'
        return
    except:
        target = open(fileloc, 'w')
        target.truncate()
        target.close()
        print 'Except!'
        return
项目:EduSpider    作者:hlpureboy    | 项目源码 | 文件源码
def drawPic(text,Pic):
    #img=imread(Pic,flatten=True)
    w=WordCloud(font_path="C:/Windows/Fonts/simhei.ttf",background_color='white').generate(text)
    plt.imshow(w)
    plt.axis("off")
    plt.savefig("F:/EduSpider/edubug.jpg",dpi=600)
项目:lagou_data_analysis    作者:jasminecjc    | 项目源码 | 文件源码
def get_word_to_cloud(self):
        for file in self.file_list:
            with codecs.open('../spider/' + file, "r",encoding='utf-8', errors='ignore') as string:
                #??????????????????
                string = string.read().upper()
                #???????????
                res = jieba.cut(string, HMM=False)
                reslist = list(res)
                wordDict = {}
                #???????????
                for i in reslist:
                    if i not in self.dic_list:
                        continue
                    if i in wordDict:
                        wordDict[i]=wordDict[i]+1
                    else:
                        wordDict[i] = 1
            #???????
            coloring = imread('test.jpeg')
            #???????????????
            wc = WordCloud(font_path='msyh.ttc',mask=coloring,
                    background_color="white", max_words=50,
                    max_font_size=40, random_state=42)

            wc.generate_from_frequencies(wordDict)
            #????
            wc.to_file("%s.png"%(file))
#???????
项目:WechatForwardBot    作者:grapeot    | 项目源码 | 文件源码
def __init__(self, fontPath):
        self.client = MongoClient()
        self.coll = self.client[dbName][collName]
        self.fontPath = fontPath
        self.wordCloud = WordCloud(font_path=self.fontPath, width=400, height=400, max_words=100)
        if not os.path.exists(self.imgDir):
            os.mkdir(self.imgDir)
        logging.info('GroupTagCloud connected to MongoDB.')
项目:notebook    作者:archever    | 项目源码 | 文件源码
def generate_img(data):
    mask_img = imread('./heart-mask.jpg')
    wordcloud = WordCloud(
        font_path='/Library/Fonts/Songti.ttc',
        background_color='white',
        mask=mask_img
    ).generate(data)
    plt.imshow(wordcloud)
    plt.axis('off')
    # plt.show()
    plt.savefig('./heart.jpg', dpi=600)