当前位置: 代码网 > it编程>编程语言>Java > SpringBoot项目实现通过分词器生成词云

SpringBoot项目实现通过分词器生成词云

2026年05月08日 Java 我要评论
前言公司项目涉及到员工任务管理,需要从员工任务中获取任务信息生成个人词云图,可以把员工任务中较为高频的词语突出展示。一、词云是什么词云就是对文本中出现频率较高的“关键词”予以视

前言

公司项目涉及到员工任务管理,需要从员工任务中获取任务信息生成个人词云图,可以把员工任务中较为高频的词语突出展示。

一、词云是什么

词云就是对文本中出现频率较高的“关键词”予以视觉上的突出,形成“关键词云层” 或“关键词渲染”,从而过滤掉大量的文本信息,使浏览网页者只要一眼扫过文本就可以领略文本的主旨。

二、使用步骤

1.引入依赖

<!--   ik分词器    -->
<dependency>
    <groupid>cn.shenyanchao.ik-analyzer</groupid>
    <artifactid>ik-analyzer</artifactid>
    <version>9.0.0</version>
</dependency>
<!--    詞雲    -->
<dependency>
    <groupid>com.kennycason</groupid>
    <artifactid>kumo-core</artifactid>
    <version>1.28</version>
</dependency>
<dependency>
    <groupid>com.kennycason</groupid>
    <artifactid>kumo-tokenizers</artifactid>
    <version>1.28</version>
</dependency>
<!--    web    -->
<dependency>
    <groupid>org.springframework.boot</groupid>
    <artifactid>spring-boot-starter-web</artifactid>
</dependency>
<dependency>
    <groupid>org.projectlombok</groupid>
    <artifactid>lombok</artifactid>
    <optional>true</optional>
</dependency>

2.application.yml

server:
  port: 8088
# 关闭日志输出 (可选)
logging:
  level:
    com.kennycason.kumo.wordcloud: off

3.controller

import com.chendi.mydemo.utils.ikanalyzerutils;
import com.chendi.mydemo.utils.workcloudutil;
import org.springframework.web.bind.annotation.getmapping;
import org.springframework.web.bind.annotation.restcontroller;

import java.util.arraylist;
import java.util.list;
import java.util.map;

@restcontroller
public class testcontroller {

    @getmapping("/")
    public void test() {
        list<string> list = new arraylist<>();
        list.add("爱购物,爱手机,爱电脑,爱上网");
        list.add("爱学习,爱游戏,爱吃饭,爱睡觉");
        list.add("爱上班,爱下班,爱加班,爱翘班");
        list.add("爱上班,爱下班,爱加班,爱翘班");
        list.add("夏天的阳光明媚灿烂,\n" +
                "大自然万物生机盎然。\n" +
                "清晨的微风吹过花丛,\n" +
                "点缀着青草和蓝天。\n" +
                "\n" +
                "蝴蝶翩翩起舞在花间,\n" +
                "蜜蜂忙碌采集甘甜。\n" +
                "鸟儿欢快地歌唱着,\n" +
                "为夏日带来欢欣和欢愉。\n" +
                "\n" +
                "海浪轻拍沙滩起伏,\n" +
                "沙粒细腻温热宜走。\n" +
                "阳光透过水面璀璨,\n" +
                "让海洋如银河般流动。\n" +
                "\n" +
                "夏日的夜晚星空闪耀,\n" +
                "月亮洒下银色光晕。\n" +
                "夏虫的音符演奏着,\n" +
                "营造出夏夜的美妙。\n" +
                "\n" +
                "夏天啊,你是如此迷人,\n" +
                "给人们带来快乐和欢欣。\n" +
                "在你的怀抱里,我们尽情享受,\n" +
                "夏天,你是美丽的季节!");

        map<string, integer> wordmap = ikanalyzerutils.wordcloud(list, 0);
        workcloudutil.generatewriteimage(wordmap);
    }

}

4.分词工具类

import org.wltea.analyzer.core.iksegmenter;
import org.wltea.analyzer.core.lexeme;

import java.io.ioexception;
import java.io.stringreader;
import java.util.*;

/**
 * 解析工具类
 */
public class ikanalyzerutils {

    /**
     * 拆分词云
     *
     * @param list     需要拆分的词云集合
     * @param quantity 结果集取的数量
     */
    public static string wordcloudparsing(list<string> list, integer quantity) {
        map<string,integer> result = wordcloud(list,quantity);
        stringbuilder str = new stringbuilder();
        result.foreach((k, v) -> {
            string value = " " + k;
            str.append(value);
        });
        return str.tostring().trim();
    }

    /**
     * 拆分词云
     *
     * @param list     需要拆分的词云集合
     * @param quantity 结果集取的数量
     */
    public static list<map<string,object>> wordcloudlist(list<string> list, integer quantity) {
        map<string,integer> result = wordcloud(list,quantity);
        list<map<string,object>> maplist = new linkedlist<>();
        result.foreach((k, v) -> {
            map<string,object> map = new hashmap<>(16);
            map.put("name",k);
            map.put("value",v);
            maplist.add(map);
        });
        collections.reverse(maplist);
        return maplist;
    }

    /**
     * 拆分词云
     *
     * @param list     需要拆分的词云集合
     * @param quantity 结果集取的数量
     */
    public static map<string,integer> wordcloud(list<string> list, integer quantity) {
        stringreader reader = new stringreader(string.join(",", list));
        iksegmenter iksegmenter = new iksegmenter(reader, true);
        map<string, integer> map = null;
        try {
            lexeme lexeme;
            map = new hashmap<>(16);
            while ((lexeme = iksegmenter.next()) != null) {
                string str = lexeme.getlexemetext();
                integer num = map.get(str);
                if (num != null && num > 0) {
                    map.put(str, num + 1);
                } else {
                    map.put(str, 1);
                }
            }
            reader.close();
        } catch (ioexception e) {
            e.printstacktrace();
        }
        map<string, integer> result = new linkedhashmap<>();
        if (quantity != null && quantity > 0) {
            map.entryset().stream().sorted(map.entry.comparingbyvalue()).limit(quantity)
                    .foreachordered(item -> result.put(item.getkey(), item.getvalue()));
        } else {
            map.entryset().stream().sorted(map.entry.comparingbyvalue())
                    .foreachordered(item -> result.put(item.getkey(), item.getvalue()));
        }
        return result;
    }
}

5.词云生成工具类、支持输出文件和字节流

import com.kennycason.kumo.collisionmode;
import com.kennycason.kumo.wordcloud;
import com.kennycason.kumo.wordfrequency;
import com.kennycason.kumo.bg.circlebackground;
import com.kennycason.kumo.font.kumofont;
import com.kennycason.kumo.font.scale.sqrtfontscalar;
import com.kennycason.kumo.nlp.frequencyanalyzer;
import com.kennycason.kumo.nlp.tokenizers.chinesewordtokenizer;
import com.kennycason.kumo.palette.colorpalette;
import lombok.sneakythrows;

import java.awt.*;
import java.io.bytearrayinputstream;
import java.io.bytearrayoutputstream;
import java.io.inputstream;
import java.util.arraylist;
import java.util.list;
import java.util.map;

public class workcloudutil {

    @sneakythrows
    public static inputstream generateimagestream(map<string, integer> wordmap) {
        wordcloud wordcloud = generatewordcloud(wordmap);
        //输出字节流
        bytearrayoutputstream out =new bytearrayoutputstream();
        wordcloud.writetostreamaspng(out);
        return new bytearrayinputstream(out.tobytearray());
    }


    @sneakythrows
    public static void generatewriteimage(map<string, integer> wordmap) {
        wordcloud wordcloud = generatewordcloud(wordmap);
        wordcloud.writetofile("d:\\chendi\\cd.png");
    }

    public static wordcloud generatewordcloud(map<string, integer> wordmap){
        if (wordmap == null || wordmap.size() == 0) {
            return null;
        }
        final frequencyanalyzer frequencyanalyzer = new frequencyanalyzer();
        frequencyanalyzer.setwordfrequenciestoreturn(600);
        frequencyanalyzer.setminwordlength(2);
        frequencyanalyzer.setwordtokenizer(new chinesewordtokenizer());
        final list<wordfrequency> wordfrequencies = new arraylist<>();
        for (map.entry<string, integer> entry : wordmap.entryset()) {
            wordfrequencies.add(new wordfrequency(entry.getkey(), entry.getvalue()));
        }
        font font = fontutil.getfont("/static/fonts/qingniaohuaguangjianmeihei-2.ttf");
        //设置图片分辨率
        final dimension dimension = new dimension(400, 400);
        //此处的设置采用内置常量即可,生成词云对象
        final wordcloud wordcloud = new wordcloud(dimension, collisionmode.pixel_perfect);
        //设置边界及字体
        wordcloud.setpadding(2);
        wordcloud.setbackgroundcolor(color.white);
        //设置背景图层为圆形,设置圆形的大小
        wordcloud.setbackground(new circlebackground(200));
        //设置词云显示的三种颜色,越靠前设置表示词频越高的词语的颜色
        wordcloud.setcolorpalette(new colorpalette(new color(0x4055f1), new color(0x408df1), new color(0x40aaf1), new color(0x40c5f1), new color(0x40d3f1), new color(0xffffff)));
        //设置字体的大小
        wordcloud.setfontscalar(new sqrtfontscalar(10, 40));
        wordcloud.setkumofont(new kumofont(font));
        wordcloud.build(wordfrequencies);
        //设置背景图片,如果想要固定的形状,就插入这个形状的图片
        //wordcloud.setbackground(new pixelboundrybackground("e:\\星星/star.jpg"));
        return wordcloud;
    }

}

fontutil

import org.springframework.core.io.classpathresource;
import org.springframework.util.stringutils;

import java.awt.*;
import java.io.inputstream;

public class fontutil {

    public static font loadfont(inputstream inputstream  , float fontsize) {
        try {
        font dynamicfont = font.createfont(font.truetype_font, inputstream);
        font dynamicfontpt = dynamicfont.derivefont(fontsize);
        inputstream.close();
        return dynamicfontpt;
        } catch (exception e){
            return new font("微软雅黑", font.plain, 18);
        }
    }

    public static font getfont(string path) {
        if(stringutils.isempty(path)){
            return new font("微软雅黑", font.plain, 18);
        }
        //获取字体文件
        classpathresource classpathresource = new classpathresource(path);
        inputstream inputstream;
        try {
            inputstream = classpathresource.getinputstream();
        } catch (exception e) {
            return new font("微软雅黑", font.plain, 18);
        }
        return fontutil.loadfont(inputstream, 18);

    }
}

注意

处理中文需要宿主机有中文字体包、如果宿主机不支持中文,请下载一个中文字体包

本文指定使用的就是qingniaohuaguangjianmeihei-2.ttf字体

百度一下、找不到私信我发你qingniaohuaguangjianmeihei-2.ttf字体包

到此这篇关于springboot项目实现通过分词器生成词云的文章就介绍到这了,更多相关springboot词云内容请搜索代码网以前的文章或继续浏览下面的相关文章希望大家以后多多支持代码网!

(0)

相关文章:

版权声明:本文内容由互联网用户贡献,该文观点仅代表作者本人。本站仅提供信息存储服务,不拥有所有权,不承担相关法律责任。 如发现本站有涉嫌抄袭侵权/违法违规的内容, 请发送邮件至 2386932994@qq.com 举报,一经查实将立刻删除。

发表评论

验证码:
Copyright © 2017-2026  代码网 保留所有权利. 粤ICP备2024248653号
站长QQ:2386932994 | 联系邮箱:2386932994@qq.com