Browse Source

简历管理代码提交。

zhanglfluofei 1 year ago
parent
commit
353e630bd0
90 changed files with 10803 additions and 0 deletions
  1. 52 0
      ruoyi-admin/pom.xml
  2. 174 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/FileUtils.java
  3. 1338 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/HanLPUtils.java
  4. 34 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/PersonalType.java
  5. 153 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/PersonalUtils.java
  6. 22 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/ProjectType.java
  7. 74 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/ProjectUtils.java
  8. 859 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/ResumeAnalysisUtil.java
  9. 28 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/ResumeType.java
  10. 180 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/Similarity.java
  11. 75 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/classification/Feature.java
  12. 62 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/classification/Instance.java
  13. 98 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/classification/NaiveBayesClassifier.java
  14. 171 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/classification/Variable.java
  15. 17 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/ISimilarity.java
  16. 60 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/phrase/PhraseSimilarity.java
  17. 12 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/ISentenceSimilarity.java
  18. 118 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/editdistance/Block.java
  19. 16 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/editdistance/CharEditUnit.java
  20. 49 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/editdistance/ChunkEditUnit.java
  21. 20 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/editdistance/EditDistance.java
  22. 52 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/editdistance/EditUnit.java
  23. 114 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/editdistance/GregorEditDistanceSimilarity.java
  24. 112 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/editdistance/NewEditDistanceSimilarity.java
  25. 105 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/editdistance/Split.java
  26. 63 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/editdistance/StandardEditDistanceSimilarity.java
  27. 92 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/editdistance/SuperString.java
  28. 52 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/editdistance/WordEditUnit.java
  29. 138 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/morphology/MorphoSimilarity.java
  30. 179 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/morphology/SemanticSimilarity.java
  31. 91 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/text/CosineSimilarity.java
  32. 77 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/text/DiceTextSimilarity.java
  33. 78 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/text/EditDistanceSimilarity.java
  34. 119 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/text/EuclideanDistanceTextSimilarity.java
  35. 40 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/text/ITextSimilarity.java
  36. 99 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/text/JaccardTextSimilarity.java
  37. 165 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/text/JaroDistanceTextSimilarity.java
  38. 106 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/text/JaroWinklerDistanceTextSimilarity.java
  39. 114 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/text/ManhattanDistanceTextSimilarity.java
  40. 203 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/text/SimHashPlusHammingDistanceTextSimilarity.java
  41. 144 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/text/TextSimilarity.java
  42. 99 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/util/AtomicFloat.java
  43. 36 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/util/DicReader.java
  44. 141 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/util/EditDistance.java
  45. 66 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/util/FileUtil.java
  46. 20 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/util/MathUtil.java
  47. 7 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/util/SimilarityUtil.java
  48. 367 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/util/StringUtil.java
  49. 16 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/util/TraverseEvent.java
  50. 34 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/util/XmlException.java
  51. 549 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/util/XmlUtils.java
  52. 70 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/CharBasedSimilarity.java
  53. 11 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/IWordSimilarity.java
  54. 74 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/clin/CilinCode.java
  55. 85 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/clin/CilinDictionary.java
  56. 58 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/clin/CilinSimilarity.java
  57. 68 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/hownet/IHownetMeta.java
  58. 230 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/hownet/concept/Concept.java
  59. 24 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/hownet/concept/ConceptLinkedList.java
  60. 256 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/hownet/concept/ConceptParser.java
  61. 317 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/hownet/concept/ConceptSimilarity.java
  62. 109 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/hownet/sememe/Sememe.java
  63. 84 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/hownet/sememe/SememeParser.java
  64. 140 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/hownet/sememe/SememeSimilarity.java
  65. 72 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/hownet/sememe/SememeType.java
  66. 153 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/pinyin/PinyinDictionary.java
  67. 33 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/pinyin/PinyinSimilarity.java
  68. 72 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/tendency/word/HownetWordTendency.java
  69. 17 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/tendency/word/IWordTendency.java
  70. 153 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/tendency/word/Training.java
  71. 97 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/tokenizer/Tokenizer.java
  72. 121 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/tokenizer/Word.java
  73. 154 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/tokenizer/WordFreqStatistics.java
  74. 54 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/word2vec/Word2vec.java
  75. 14 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/word2vec/domain/HiddenNeuron.java
  76. 27 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/word2vec/domain/Neuron.java
  77. 28 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/word2vec/domain/WordEntry.java
  78. 64 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/word2vec/domain/WordNeuron.java
  79. 42 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/word2vec/vec/Huffman.java
  80. 395 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/word2vec/vec/Learn.java
  81. 244 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/word2vec/vec/ModelParser.java
  82. 49 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/word2vec/vec/VecMap.java
  83. 104 0
      ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/word2vec/vec/WordKmeans.java
  84. BIN
      ruoyi-admin/src/main/lib/hanlp-1.8.3.jar
  85. 41 0
      ruoyi-admin/src/main/resources/hanlp.properties
  86. 13 0
      ruoyi-common/src/main/java/com/ruoyi/common/utils/DateUtils.java
  87. 151 0
      ruoyi-system/src/main/java/com/ruoyi/system/domain/resume/Resume.java
  88. 27 0
      ruoyi-system/src/main/java/com/ruoyi/system/domain/resume/ResumeEducation.java
  89. 29 0
      ruoyi-system/src/main/java/com/ruoyi/system/domain/resume/ResumeProject.java
  90. 33 0
      ruoyi-system/src/main/java/com/ruoyi/system/domain/resume/ResumeWork.java

+ 52 - 0
ruoyi-admin/pom.xml

@@ -66,6 +66,58 @@
             <artifactId>ruoyi-flowable</artifactId>
         </dependency>
 
+        <!--
+			jxl中文支持比较好;
+			poi 的效率高于 jxl,支持的格式多余jxl;
+			随着excel的变大 poi性能会直线下降,jxl会有提升
+		 -->
+        <dependency>
+            <groupId>org.apache.poi</groupId>
+            <artifactId>poi</artifactId>
+            <version>3.12</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.poi</groupId>
+            <artifactId>poi-ooxml</artifactId>
+            <version>3.12</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.poi</groupId>
+            <artifactId>poi-ooxml-schemas</artifactId>
+            <version>3.12</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.poi</groupId>
+            <artifactId>poi-scratchpad</artifactId>
+            <version>3.12</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.poi</groupId>
+            <artifactId>poi-excelant</artifactId>
+            <version>3.12</version>
+        </dependency>
+        <dependency>
+            <groupId>com.deepoove</groupId>
+            <artifactId>poi-tl</artifactId>
+            <version>1.3.1</version>
+        </dependency>
+        <!-- end poi api 依赖 -->
+
+        <!-- google java lib -->
+        <dependency>
+            <groupId>com.google.guava</groupId>
+            <artifactId>guava</artifactId>
+            <version>17.0</version>
+        </dependency>
+
+        <!-- hanlp lib -->
+        <dependency>
+            <groupId>com.hanlp</groupId>
+            <artifactId>hanlp</artifactId>
+            <version>1.8.3</version>
+            <scope>system</scope>
+            <systemPath>${basedir}/src/main/lib/hanlp-1.8.3.jar</systemPath>
+        </dependency>
     </dependencies>
 
     <build>

+ 174 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/FileUtils.java

@@ -0,0 +1,174 @@
+package com.ruoyi.utils.resumeAnalysis;
+
+import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+
+import java.io.*;
+import java.util.ArrayList;
+import java.util.Enumeration;
+import java.util.List;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+
+import static org.springframework.util.FileCopyUtils.BUFFER_SIZE;
+
+public class FileUtils {
+
+
+    /**
+     * zip解压
+     * @param srcFile        zip源文件
+     * @param destDirPath     解压后的目标文件夹
+     * @throws RuntimeException 解压失败会抛出运行时异常
+     */
+
+    public static String unZip(File srcFile, String destDirPath) throws RuntimeException {
+        String result = "";
+        long start = System.currentTimeMillis();
+        // 判断源文件是否存在
+        if (!srcFile.exists()) {
+            throw new RuntimeException(srcFile.getPath() + "所指文件不存在");
+        }
+        // 开始解压
+        ZipFile zipFile = null;
+        try {
+            zipFile = new ZipFile(srcFile);
+            Enumeration<?> entries = zipFile.entries();
+            while (entries.hasMoreElements()) {
+                ZipEntry entry = (ZipEntry) entries.nextElement();
+                result = entry.getName();
+                System.out.println("解压" + entry.getName());
+                // 如果是文件夹,就创建个文件夹
+                if (entry.isDirectory()) {
+                    String dirPath = destDirPath + "/" + entry.getName();
+                    File dir = new File(dirPath);
+                    dir.mkdirs();
+                } else {
+                    // 如果是文件,就先创建一个文件,然后用io流把内容copy过去
+                    File targetFile = new File(destDirPath + "/" + entry.getName());
+                    // 保证这个文件的父文件夹必须要存在
+                    if(!targetFile.getParentFile().exists()){
+                        targetFile.getParentFile().mkdirs();
+                    }
+                    targetFile.createNewFile();
+                    // 将压缩文件内容写入到这个文件中
+                    InputStream is = zipFile.getInputStream(entry);
+                    FileOutputStream fos = new FileOutputStream(targetFile);
+                    int len;
+                    byte[] buf = new byte[BUFFER_SIZE];
+                    while ((len = is.read(buf)) != -1) {
+                        fos.write(buf, 0, len);
+                    }
+                    // 关流顺序,先打开的后关闭
+                    fos.close();
+                    is.close();
+                }
+            }
+            long end = System.currentTimeMillis();
+            System.out.println("解压完成,耗时:" + (end - start) +" ms");
+        } catch (Exception e) {
+            throw new RuntimeException("unzip error from ZipUtils", e);
+        } finally {
+            if(zipFile != null){
+                try {
+                    zipFile.close();
+                } catch (IOException e) {
+                    e.printStackTrace();
+                }
+            }
+        }
+        return  result;
+    }
+
+    public static byte[] getImageStream(String url) {
+        byte[] buffer = null;
+        File file = new File(url);
+        FileInputStream fis;
+        try {
+            ByteArrayOutputStream bos = new ByteArrayOutputStream();
+            fis = new FileInputStream(file);
+            byte[] b = new byte[1024];
+            int n;
+            while ((n = fis.read(b)) != -1) {
+                bos.write(b, 0, n);
+            }
+            fis.close();
+            bos.close();
+            buffer = bos.toByteArray();
+            if(file.exists()) {
+                file.delete();
+            }
+        } catch (FileNotFoundException e) {
+            e.printStackTrace();
+        }catch (IOException e) {
+            e.printStackTrace();
+        }
+        return buffer;
+    }
+
+    /**
+     * @Description: POI 读取  word
+     * @create: 2019-07-27 9:48
+     * @update logs
+     * @throws Exception
+     */
+    public static List<String> readWord(String filePath) throws Exception{
+
+        List<String> linList = new ArrayList<String>();
+        String buffer = "";
+        try {
+            if (filePath.endsWith(".doc")) {
+                InputStream is = new FileInputStream(new File(filePath));
+                WordExtractor ex = new WordExtractor(is);
+                buffer = ex.getText();
+                ex.close();
+
+                if(buffer.length() > 0){
+                    //使用回车换行符分割字符串
+                    String [] arry = buffer.split("\\n");
+                    for (String string : arry) {
+                        linList.add(string.trim());
+                    }
+                }
+            }
+
+            return linList;
+        } catch (Exception e) {
+            System.out.print("error---->"+filePath);
+            e.printStackTrace();
+            return null;
+        }
+    }
+
+
+    public static List<String> readWordDocx(String filePath) throws IOException {
+
+        List<String> linList = new ArrayList<String>();
+        String buffer = "";
+        try {
+            if (filePath.endsWith(".docx")) {
+                InputStream is = new FileInputStream(new File(filePath));
+                XWPFDocument xdoc = new XWPFDocument(is);
+                XWPFWordExtractor ex = new XWPFWordExtractor(xdoc);
+                buffer = ex.getText();
+                ex.close();
+
+                if(buffer.length() > 0){
+                    //使用回车换行符分割字符串
+                    String [] arry = buffer.split("\\n");
+                    for (String string : arry) {
+                        linList.add(string.trim());
+                    }
+                }
+            }
+
+            return linList;
+        } catch (Exception e) {
+            System.out.print("error---->"+filePath);
+            e.printStackTrace();
+            return null;
+        }
+    }
+
+}

File diff suppressed because it is too large
+ 1338 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/HanLPUtils.java


+ 34 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/PersonalType.java

@@ -0,0 +1,34 @@
+package com.ruoyi.utils.resumeAnalysis;
+
+public enum PersonalType {
+
+    XM("XM"),
+    YX("YX"),
+    SJH("SJH"),
+    MZ("MZ"),
+    XB("XB"),
+    XL("XL"),
+    XX("XX"),
+    ZY("ZY"),
+    GZJY("GZJY"),
+    JZD("JZD"),
+    GZD("GZD"),
+    SGTZ("SGTZ"),
+    SR("SR"),
+    YYNL("YYNL"),
+    GW("GW"),
+    BYSJ("BYSJ");
+
+    private final String code;
+
+    PersonalType(String code)
+    {
+        this.code = code;
+    }
+
+    public String getCode()
+    {
+        return code;
+    }
+
+}

+ 153 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/PersonalUtils.java

@@ -0,0 +1,153 @@
+package com.ruoyi.utils.resumeAnalysis;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * 判断标准内容内的小标识下标
+ * Utils类
+ */
+public class PersonalUtils {
+
+    /* 姓名 */
+    public static final List<String> fullNameList = Arrays.asList("姓名");
+    /* 邮箱 */
+    public static final List<String> mailboxList = Arrays.asList("邮箱","e-mail","电子邮件");
+    /* 电话 */
+    public static final List<String> phoneList = Arrays.asList("手机","移动电话","iphone","phone","telephone","手机号","电话","联系电话","联系方式");
+    /* 民族 */
+    public static final List<String> nationList = Arrays.asList("民族");
+    /* 性别 */
+    public static final List<String> genderList = Arrays.asList("性别");
+    /* 学历 */
+    public static final List<String> educationList = Arrays.asList("学历");
+    /* 学校 */
+    public static final List<String> schoolList = Arrays.asList("毕业学校","毕业院校","学校","教育背景");
+    /* 专业 */
+    public static final List<String> majorList = Arrays.asList("专业");
+    /* 工作经验 */
+    public static final List<String> workExperienceList = Arrays.asList("工作经验","工作年限","行业经验");
+    /* 居住地 */
+    public static final List<String> residenceList = Arrays.asList("现居住地","住址","现所在地","所在地区","所在地","现居地","居住地","户籍","户口","籍贯");
+    /* 工作地 */
+    public static final List<String> workplaceList = Arrays.asList("工作地");
+    /* 身高体重 */
+    public static final List<String> bodyWeightList = Arrays.asList("身高体重");
+    /* 生日 */
+    public static final List<String> birthdayList = Arrays.asList("出生年月","出生日期","生日","年龄");
+    /* 语言能力 */
+    public static final List<String> languageList = Arrays.asList("语言能力","工作语言");
+    /* 岗位 */
+    public static final List<String> postList = Arrays.asList("应聘岗位","岗位");
+    /* 毕业时间 */
+    public static final List<String> graduationList = Arrays.asList("毕业时间");
+
+
+    public static List<Map<Integer,String>> getPersonalSubscript(String content){
+        List<Map<Integer,String>> personalSubscriptList = new ArrayList<>();
+        //姓名读取
+        Map<Integer,String> fullNameMap = ResumeAnalysisUtil.getSubscript(fullNameList,content);
+        if(fullNameMap!=null&&fullNameMap.size()>0){
+            personalSubscriptList.add(fullNameMap);
+        }
+        //邮箱
+        Map<Integer,String> mailboxMap = ResumeAnalysisUtil.getSubscript(mailboxList,content.toLowerCase());
+        if(mailboxMap!=null&&mailboxMap.size()>0){
+            personalSubscriptList.add(mailboxMap);
+        }
+        //手机号
+        Map<Integer,String> phoneMap = ResumeAnalysisUtil.getSubscript(phoneList,content.toLowerCase());
+        if(phoneMap!=null&&phoneMap.size()>0){
+            personalSubscriptList.add(phoneMap);
+        }
+        //民族
+        Map<Integer,String> nationMap = ResumeAnalysisUtil.getSubscript(nationList,content);
+        if(nationMap!=null&&nationMap.size()>0){
+            personalSubscriptList.add(nationMap);
+        }
+        //性别
+        Map<Integer,String> genderMap = ResumeAnalysisUtil.getSubscript(genderList,content);
+        if(genderMap!=null&&genderMap.size()>0){
+            personalSubscriptList.add(genderMap);
+        }
+        //学历
+        Map<Integer,String> educationMap = ResumeAnalysisUtil.getSubscript(educationList,content);
+        if(educationMap!=null&&educationMap.size()>0){
+            personalSubscriptList.add(educationMap);
+        }
+        //学校
+        Map<Integer,String> schoolMap = ResumeAnalysisUtil.getSubscript(schoolList,content);
+        if(schoolMap!=null&&schoolMap.size()>0){
+            personalSubscriptList.add(schoolMap);
+        }
+        //专业
+        Map<Integer,String> majorMap = ResumeAnalysisUtil.getSubscript(majorList,content);
+        if(majorMap!=null&&majorMap.size()>0){
+            personalSubscriptList.add(majorMap);
+        }
+        //工作经验
+        Map<Integer,String> workExperienceMap = ResumeAnalysisUtil.getSubscript(workExperienceList,content);
+        if(workExperienceMap!=null&&workExperienceMap.size()>0){
+            personalSubscriptList.add(workExperienceMap);
+        }
+        //居住地
+        Map<Integer,String> residenceMap = ResumeAnalysisUtil.getSubscript(residenceList,content);
+        if(residenceMap!=null&&residenceMap.size()>0){
+            personalSubscriptList.add(residenceMap);
+        }
+        //工作地
+        Map<Integer,String> workplaceMap = ResumeAnalysisUtil.getSubscript(workplaceList,content);
+        if(workplaceMap!=null&&workplaceMap.size()>0){
+            personalSubscriptList.add(workplaceMap);
+        }
+        //身高体重
+        Map<Integer,String> bodyWeightMap = ResumeAnalysisUtil.getSubscript(bodyWeightList,content);
+        if(bodyWeightMap!=null&&bodyWeightMap.size()>0){
+            personalSubscriptList.add(bodyWeightMap);
+        }
+        //生日
+        Map<Integer,String> birthdayMap = ResumeAnalysisUtil.getSubscript(birthdayList,content);
+        if(birthdayMap!=null&&birthdayMap.size()>0){
+            personalSubscriptList.add(birthdayMap);
+        }
+        //语言能力
+        Map<Integer,String> languageMap = ResumeAnalysisUtil.getSubscript(languageList,content);
+        if(languageMap!=null&&languageMap.size()>0){
+            personalSubscriptList.add(languageMap);
+        }
+        //岗位
+        Map<Integer,String> postMap = ResumeAnalysisUtil.getSubscript(postList,content);
+        if(postMap!=null&&postMap.size()>0){
+            personalSubscriptList.add(postMap);
+        }
+        //毕业时间
+        Map<Integer,String> graduationMap = ResumeAnalysisUtil.getSubscript(graduationList,content);
+        if(graduationMap!=null&&graduationMap.size()>0){
+            personalSubscriptList.add(graduationMap);
+        }
+        return personalSubscriptList;
+    }
+
+    public static PersonalType toJudgeType(String key){
+        if(ResumeAnalysisUtil.getVerificationType(fullNameList,key))return PersonalType.XM;
+        if(ResumeAnalysisUtil.getVerificationType(mailboxList,key))return PersonalType.YX;
+        if(ResumeAnalysisUtil.getVerificationType(phoneList,key))return PersonalType.SJH;
+        if(ResumeAnalysisUtil.getVerificationType(nationList,key))return PersonalType.MZ;
+        if(ResumeAnalysisUtil.getVerificationType(genderList,key))return PersonalType.XB;
+        if(ResumeAnalysisUtil.getVerificationType(educationList,key))return PersonalType.XL;
+        if(ResumeAnalysisUtil.getVerificationType(schoolList,key))return PersonalType.XX;
+        if(ResumeAnalysisUtil.getVerificationType(majorList,key))return PersonalType.ZY;
+        if(ResumeAnalysisUtil.getVerificationType(workExperienceList,key))return PersonalType.GZJY;
+        if(ResumeAnalysisUtil.getVerificationType(residenceList,key))return PersonalType.JZD;
+        if(ResumeAnalysisUtil.getVerificationType(workplaceList,key))return PersonalType.GZD;
+        if(ResumeAnalysisUtil.getVerificationType(bodyWeightList,key))return PersonalType.SGTZ;
+        if(ResumeAnalysisUtil.getVerificationType(birthdayList,key))return PersonalType.SR;
+        if(ResumeAnalysisUtil.getVerificationType(languageList,key))return PersonalType.YYNL;
+        if(ResumeAnalysisUtil.getVerificationType(postList,key))return PersonalType.GW;
+        if(ResumeAnalysisUtil.getVerificationType(graduationList,key))return PersonalType.BYSJ;
+        return null;
+    }
+
+}

+ 22 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/ProjectType.java

@@ -0,0 +1,22 @@
+package com.ruoyi.utils.resumeAnalysis;
+
+public enum ProjectType {
+
+    XMMC("XMMC"),
+    XMJJ("XMJJ"),
+    XMZZ("XMZZ"),
+    XMJS("XMJS"),
+    KFGJ("KFGJ"),
+    XMZQ("XMZQ");
+
+    private final String code;
+    ProjectType(String code)
+    {
+        this.code = code;
+    }
+
+    public String getCode()
+    {
+        return code;
+    }
+}

+ 74 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/ProjectUtils.java

@@ -0,0 +1,74 @@
+package com.ruoyi.utils.resumeAnalysis;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * 项目经历解析标识归类
+ */
+public class ProjectUtils {
+
+    /* 项目名称 */
+    public static final List<String> nameList = Arrays.asList("项目:","项目名称","项目一","项目二","项目三","项目四","项目五","项目六","项目七");
+    /* 项目简介 */
+    public static final List<String> synopsisList = Arrays.asList("项目简介","项目概述","项目描述");
+    /* 项目职责 */
+    public static final List<String> dutyList = Arrays.asList("项目职责","责任描述","主要职责","职责描述","个人职责");
+    /* 主要技术 */
+    public static final List<String> technologyList = Arrays.asList("主要技术","项目技术","基础框架","技术框架","技术描述","使用框架","技术要点","软件环境");
+    /* 开发工具 */
+    public static final List<String> developList = Arrays.asList("开发环境","使用技术","开发工具");
+    /* 项目周期 */
+    public static final List<String> cycleList = Arrays.asList("项目周期","开发时间","项目时间","时间");
+
+
+    public static List<Map<Integer,String>> getProjectSubscript(String content){
+        List<Map<Integer,String>> projectSubscriptList = new ArrayList<>();
+        /* 项目名称 */
+        Map<Integer,String> nameMap = ResumeAnalysisUtil.getSubscript(nameList,content);
+        if(nameMap!=null&&nameMap.size()>0){
+            projectSubscriptList.add(nameMap);
+        }
+        /* 项目简介 */
+        Map<Integer,String> synopsisMap = ResumeAnalysisUtil.getSubscript(synopsisList,content);
+        if(synopsisMap!=null&&synopsisMap.size()>0){
+            projectSubscriptList.add(synopsisMap);
+        }
+        /* 项目职责 */
+        Map<Integer,String> dutyMap = ResumeAnalysisUtil.getSubscript(dutyList,content);
+        if(dutyMap!=null&&dutyMap.size()>0){
+            projectSubscriptList.add(dutyMap);
+        }
+        /* 主要技术 */
+        Map<Integer,String> technologyMap = ResumeAnalysisUtil.getSubscript(technologyList,content);
+        if(technologyMap!=null&&technologyMap.size()>0){
+            projectSubscriptList.add(technologyMap);
+        }
+        /* 开发工具 */
+        Map<Integer,String> developMap = ResumeAnalysisUtil.getSubscript(developList,content);
+        if(developMap!=null&&developMap.size()>0){
+            projectSubscriptList.add(developMap);
+        }
+        /* 项目周期 */
+        Map<Integer,String> cycleMap = ResumeAnalysisUtil.getSubscript(cycleList,content);
+        if(cycleMap!=null&&cycleMap.size()>0){
+            projectSubscriptList.add(cycleMap);
+        }
+
+        return projectSubscriptList;
+    }
+
+    public static ProjectType toJudgeType(String key){
+        if(ResumeAnalysisUtil.getVerificationType(nameList,key))return ProjectType.XMMC;
+        if(ResumeAnalysisUtil.getVerificationType(synopsisList,key))return ProjectType.XMJJ;
+        if(ResumeAnalysisUtil.getVerificationType(dutyList,key))return ProjectType.XMZZ;
+        if(ResumeAnalysisUtil.getVerificationType(technologyList,key))return ProjectType.XMJS;
+        if(ResumeAnalysisUtil.getVerificationType(developList,key))return ProjectType.KFGJ;
+        if(ResumeAnalysisUtil.getVerificationType(cycleList,key))return ProjectType.XMZQ;
+        return null;
+    }
+
+
+}

+ 859 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/ResumeAnalysisUtil.java

@@ -0,0 +1,859 @@
+package com.ruoyi.utils.resumeAnalysis;
+
+import com.ruoyi.common.utils.DateUtils;
+import com.ruoyi.system.domain.resume.Resume;
+import com.ruoyi.system.domain.resume.ResumeEducation;
+import com.ruoyi.system.domain.resume.ResumeProject;
+import com.ruoyi.system.domain.resume.ResumeWork;
+import com.ruoyi.utils.resumeAnalysis.similarity.Similarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.util.StringUtil;
+import com.hankcs.hanlp.corpus.tag.Nature;
+import com.hankcs.hanlp.seg.common.Term;
+import com.hankcs.hanlp.tokenizer.NLPTokenizer;
+import org.apache.commons.lang3.StringUtils;
+
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class ResumeAnalysisUtil {
+
+    public static void main(String[] main) throws Exception {
+        long startTime = System.currentTimeMillis();
+//        String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1669778120747.doc";
+//        String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1667438649530s.doc";
+//        String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1669798761043.doc";
+//        String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1667371161809.doc";
+//        String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1667371161809.doc";
+//        String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671083578748.doc";
+//        String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671088704774.doc";
+//        String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671088776900.doc";
+//        String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671089013938.doc";
+//        String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671095791131.doc";
+//        String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671095908789.doc";
+//        String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671096212499.doc";
+//        String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671152868926.doc";
+//        String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671156366121.doc";
+        //String Path = "C:\\Users\\Administrator\\Desktop\\王胜利-Java-佛山.doc";
+        String Path = "D:\\我的文件\\me\\张洛飞入职资料\\张洛飞\\张洛飞简历.doc";
+//        String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671433184218.doc";
+//        String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671436400350.doc";
+//        String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671436611530.doc";
+//        String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671436736483.doc";
+
+//        String Path = "C:\\Users\\zjc\\Desktop\\1671095908789.doc";
+
+        //读取word内容
+        List<String> result = FileUtils.readWord(Path);
+        //解析
+        Resume resume = toResumeAnalysis(result);
+        System.out.println(resume);
+        long endTime = System.currentTimeMillis();
+        System.out.println("程序运行时间:" + (double) (endTime - startTime) / 1000 + "s");
+    }
+
+    public static Resume toResumeAnalysis(List<String> result){
+
+        //获取标识和基础信息相似度
+        Map<String, Map<String,Object>> resultMap = subsection(result);
+        //大标识
+        List<Integer> list = identification(resultMap);
+        //大标识内容
+        List<Map<String,String>> paragraphList = paragraph(list,resultMap);
+        //其余标识内容
+        Map<String,String> surplusMap = surplus(result,list,paragraphList);
+
+//        System.out.println(resultMap);
+//        System.out.println(list);
+//        System.out.println(paragraphList);
+//        System.out.println(surplusMap);
+
+        //简历基础信息对象
+         Resume resume = new Resume();
+
+        //判断分段中是否有个人信息大标识
+        boolean PERSONAL_FLAG = true;
+
+        //开始进行基础信息处理
+        for (Map<String,String> str:paragraphList) {
+            if(StringUtils.isNotEmpty(str.get("title"))){
+                String title = HanLPUtils.StringFilter(str.get("title"));
+                if(StringUtils.isNotEmpty(title)){
+                    //map排序
+                    Map<String,String> mapByKey = HanLPUtils.sortMapByKey(str);
+
+                    //个人信息、基础信息
+                    if(containsWordsIndexOf(title,ResumeType.PERSONAL)){
+
+                        /**
+                         * 处理数据
+                         * 姓名、手机号、学历、行业经验、毕业院校、E-mail、工作经验
+                         * 工作地、居住地、专业
+                         */
+                        for (Map.Entry<String, String> entry : mapByKey.entrySet()) {
+                            if(!entry.getKey().equals("title")){
+                                String content = entry.getValue().replaceAll("\\s+", "");
+                                //获取内容中的小标识
+                                List<Map<Integer,String>> mapList = PersonalUtils.getPersonalSubscript(content);
+                                //进行数据摘取
+                                Map<Integer,String> resumeMap = toArrangementSort(mapList);
+                                //拆分数据后进行分类
+                                Map<String,String> dataList = toSplitContent(resumeMap,content);
+                                for (Map.Entry<String,String> dataMap: dataList.entrySet()) {
+                                    PersonalType personalType = PersonalUtils.toJudgeType(dataMap.getKey());
+                                    if(personalType.getCode().equals("XM"))resume.setUserName(dataMap.getValue());//姓名
+                                    if(personalType.getCode().equals("YX"))resume.setEmail(dataMap.getValue());//邮箱
+                                    if(personalType.getCode().equals("SJH"))resume.setMobile(dataMap.getValue());//电话
+                                    if(personalType.getCode().equals("MZ"))resume.setNationality(dataMap.getValue());//民族
+                                    if(personalType.getCode().equals("XB"))resume.setGender(dataMap.getValue());//性别
+                                    if(personalType.getCode().equals("XL"))resume.setDegree(dataMap.getValue());//学历
+                                    if(personalType.getCode().equals("XX"))resume.setGraduateCollege(dataMap.getValue());//学校
+                                    if(personalType.getCode().equals("ZY"))resume.setMajor(dataMap.getValue());//专业
+                                    if(personalType.getCode().equals("GZJY"))resume.setWorkExperience(dataMap.getValue());//工作经验
+                                    if(personalType.getCode().equals("JZD"))resume.setResidence(dataMap.getValue());//居住地
+                                    if(personalType.getCode().equals("GZD"))resume.setWorkingPlace(dataMap.getValue());//工作地
+//                                    if(personalType.getCode().equals("SGTZ"))resume.setUserName(dataMap.getValue());//身高体重
+                                    if(personalType.getCode().equals("SR"))resume.setBirthDateString(dataMap.getValue());//生日
+                                    if(personalType.getCode().equals("YYNL"))resume.setLanguageAbility(dataMap.getValue());//语言能力
+                                    if(personalType.getCode().equals("GW"))resume.setPosition(dataMap.getValue());//岗位
+//                                    if(personalType.getCode().equals("BYSJ"))resume.setUserName(dataMap.getValue());//毕业时间
+
+                                }
+                            }
+                        }
+                        /*
+                         如果又个人信息和基础信息那么就进行信息提取如果没有则从其他信息里提取信息
+                         */
+                        PERSONAL_FLAG = false;
+                    }
+                    //工作经历
+                    if(containsWordsIndexOf(title,ResumeType.WORKEXPERIENCE)){
+                        /**
+                         * 开始处理工作经历信息
+                         */
+                        List<String> dateList = new ArrayList<>();
+                        List<String> companyNameList = new ArrayList<>();
+                        List<String> postionNameList = new ArrayList<>();
+                        List<String> workDetailList = new ArrayList<>();
+                        List<ResumeWork> workList = new ArrayList<>();
+                        //当前标识节点
+                        String flag = "";
+                        for (Map.Entry<String, String> entry : mapByKey.entrySet()) {
+                            if (!entry.getKey().equals("title")) {
+                                String content = entry.getValue().replaceAll("\\s+", "");
+                                //提取时间
+
+                                String SR = "^\\d{4}(.|-|\\/|年)\\d{1,2}(~|—|--|–|~|-|\\/)(\\d{4}(.|-|\\/|年)\\d{1,2}|至今)";
+                                Pattern pattern = Pattern.compile(SR); //尝试提取这样类型的数据
+                                Matcher matcher = pattern.matcher(content);
+                                while (matcher.find()) {
+                                    dateList.add(matcher.group());
+                                    String dateString = matcher.group();
+                                    content = content.replace(dateString,"");
+                                    flag = dateString;
+                                }
+
+                                if(content.contains("有限公司")){
+                                    String[] resumeWork = content.split("有限公司");
+                                    if(resumeWork != null && resumeWork.length > 0){
+                                        companyNameList.add(resumeWork[0]+"有限公司");
+                                    }
+                                    if(resumeWork.length >= 2){
+                                        postionNameList.add(resumeWork[1]);
+                                    }
+                                }
+                                if(dateList!=null && dateList.size()>0){
+                                    if(dateList.get(dateList.size()-1).equals(flag)){
+                                        if(workDetailList.size() == dateList.size()){
+                                            workDetailList.set(dateList.size()-1,workDetailList.get(dateList.size()-1)+content);
+                                        }else{
+                                            workDetailList.add(content);
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                        if(null != dateList && dateList.size() > 0){
+                            for (int i = 0; i < dateList.size(); i++) {
+                                ResumeWork resumeWork = new ResumeWork();
+                                String[] datestr = dateList.get(i).split("(~|--|–|~|-)");
+                                if(datestr.length>0){
+                                    Date startDate = DateUtils.parseDate(datestr[0]);
+                                    Date endDate = DateUtils.parseDate(datestr[1]);
+                                    if(datestr[0].equals("至今")){
+                                        startDate = new Date();
+                                    }
+                                    if(datestr[1].equals("至今")){
+                                        endDate = new Date();
+                                    }
+                                    resumeWork.setIinductionStartDate(DateUtils.formatDate(startDate,"yyyy-MM"));
+                                    resumeWork.setIinductionEndDate(DateUtils.formatDate(endDate,"yyyy-MM"));
+                                }
+                                if(companyNameList!=null&&companyNameList.size() == dateList.size()){
+                                    resumeWork.setCompanyName(companyNameList.get(i));
+                                }
+                                if(postionNameList!=null&&postionNameList.size() == dateList.size()){
+                                    resumeWork.setPostionName(postionNameList.get(i));
+                                }
+                                if(workDetailList!=null&&workDetailList.size() == dateList.size()){
+                                    resumeWork.setWorkDetail(workDetailList.get(i));
+                                }
+                                workList.add(resumeWork);
+                            }
+                        }
+                        resume.setWorkList(workList);
+                    }
+                    //项目经验
+                    if(containsWordsIndexOf(title,ResumeType.PROJECTEXPERIENCE)){
+                        List<Map<String,Object>> regularList = new ArrayList<>();
+                        for (Map.Entry<String, String> entry : mapByKey.entrySet()) {
+                            if (!entry.getKey().equals("title")) {
+                                String content = entry.getValue().replaceAll("\\s+", "");
+                                //获取内容小标识
+                                List<Map<Integer,String>> mapList = ProjectUtils.getProjectSubscript(content);
+                                //进行数据摘取
+                                Map<Integer,String> resumeMap = toArrangementSort(mapList);
+                                //拆分数据后进行分类
+                                Map<String,String> dataList = toSplitContent(resumeMap,content);
+                                //在根据整体下标在进行进一步拆分处理
+                                Map<String,Object> newMap = new HashMap<>();
+                                newMap.put("dataList",dataList);
+                                newMap.put("content",content);
+                                newMap.put("key",entry.getKey());
+                                regularList.add(newMap);
+                            }
+                        }
+                        //开始处理数据并进行分段处理
+                        List<ResumeProject> resumeProjectList = toProjectSubsection(regularList);
+                        if(resumeProjectList != null && resumeProjectList.size() > 0){
+                            resume.setProjectList(resumeProjectList);
+                        }
+                    }
+                    //教育经历
+                    if(containsWordsIndexOf(title,ResumeType.EDUCATIONALEXPERIENCE)){
+                        List<ResumeEducation> eduList = new ArrayList<>();
+                        //根据分词处理数据
+                        for (Map.Entry<String, String> entry : mapByKey.entrySet()) {
+                            if (!entry.getKey().equals("title")) {
+                                String content = entry.getValue().replaceAll("\\s+", "");
+
+                                List<String> dateList = new ArrayList<>();
+                                String school = "";
+                                String major = "";
+                                //提取时间
+                                String SR = "^\\d{4}(.|-|\\/|年)\\d{1,2}(~|—|--|–|~|-|\\/)(\\d{4}(.|-|\\/|年)\\d{1,2}|至今)";
+                                Pattern pattern = Pattern.compile(SR); //尝试提取这样类型的数据
+                                Matcher matcher = pattern.matcher(content);
+                                while (matcher.find()) {
+                                    dateList.add(matcher.group());
+                                    String dateString = matcher.group();
+                                    content = content.replace(dateString,"");
+                                }
+
+                                List<Term> termList = NLPTokenizer.segment(content);
+//                                System.out.println(termList);
+                                ResumeEducation resumeEducation = new ResumeEducation();
+                                if(termList!= null && termList.size()>0){
+                                    for (int i = 0; i < termList.size(); i++) {
+                                        Term term  = termList.get(i);
+                                        if (term.nature.equals(Nature.nt)
+                                                ||term.nature.toString().equals("学校")
+                                                ||term.nature.equals(Nature.ntu)
+                                                ||term.nature.equals(Nature.nts)) {
+                                            school += term.word;
+                                        } else if (term.nature.toString().equals("学历")) {
+                                            resumeEducation.setDegree(term.word);
+                                        } else {
+                                            major += term.word;
+                                        }
+                                    }
+                                }
+                                resumeEducation.setSchoolName(school);
+                                resumeEducation.setMajor(major);
+                                for (int i = 0; i < dateList.size(); i++) {
+                                    String[] dateStr = dateList.get(i).split("(~|--|–|~|-)");
+                                    if(dateStr!=null&&dateStr.length>0){
+                                        Date startDate = DateUtils.parseDate(dateStr[0]);
+                                        resumeEducation.setEduStartDate(DateUtils.formatDate(startDate,"yyyy-MM"));
+                                        Date endDate = DateUtils.parseDate(dateStr[1]);
+                                        resumeEducation.setEduEndDate(DateUtils.formatDate(endDate,"yyyy-MM"));
+                                    }
+                                }
+                                eduList.add(resumeEducation);
+                            }
+                        }
+                        if(eduList != null && eduList.size() > 0){
+                            resume.setEduList(eduList);
+                        }
+                    }
+                    //自我评价
+                    if(containsWordsIndexOf(title,ResumeType.SELFEVALUATION)){
+                        String selfIntroduce = "";
+                        for (Map.Entry<String, String> entry : mapByKey.entrySet()) {
+                            if(!entry.getKey().equals("title")){
+                                selfIntroduce += entry.getValue();
+                            }
+                        }
+                        resume.setSelfIntroduce(selfIntroduce);
+
+                    }
+                    //专业技能
+                    if(containsWordsIndexOf(title,ResumeType.PROFESSIONALSKILLS)){
+                        String keyWords = "";
+                        for (Map.Entry<String, String> entry : mapByKey.entrySet()) {
+                            if(!entry.getKey().equals("title")){
+                                keyWords += entry.getValue();
+                            }
+                        }
+                        resume.setKeyWords(keyWords);
+                    }
+                    //证书
+                    if(containsWordsIndexOf(title,ResumeType.CERTIFICATE)){
+                        String certificate = "";
+                        for (Map.Entry<String, String> entry : mapByKey.entrySet()) {
+                            if(!entry.getKey().equals("title")){
+                                certificate += entry.getValue();
+                            }
+                        }
+                        resume.setCertificate(certificate);
+                    }
+                }
+            }
+        }
+        //如果没有则在其他信息里提取
+        if(PERSONAL_FLAG){
+            /**
+             * 从其他信息中提取基本信息
+             */
+            /**
+             * 处理数据
+             * 姓名、手机号、学历、行业经验、毕业院校、E-mail、工作经验
+             * 工作地、居住地、专业
+             */
+            for (Map.Entry<String, String> entry : surplusMap.entrySet()) {
+                if(!entry.getKey().equals("title")){
+                    String content = entry.getValue().replaceAll("\\s+", "");
+                    //获取内容中的小标识
+                    List<Map<Integer,String>> mapList = PersonalUtils.getPersonalSubscript(content);
+                    //进行数据摘取
+                    Map<Integer,String> resumeMap = toArrangementSort(mapList);
+                    //拆分数据后进行分类
+                    Map<String,String> dataList = toSplitContent(resumeMap,content);
+                    for (Map.Entry<String,String> dataMap: dataList.entrySet()) {
+                        PersonalType personalType = PersonalUtils.toJudgeType(dataMap.getKey());
+                        if(personalType.getCode().equals("XM"))resume.setUserName(dataMap.getValue());//姓名
+                        if(personalType.getCode().equals("YX"))resume.setEmail(dataMap.getValue());//邮箱
+                        if(personalType.getCode().equals("SJH"))resume.setMobile(dataMap.getValue());//电话
+                        if(personalType.getCode().equals("MZ"))resume.setNationality(dataMap.getValue());//民族
+                        if(personalType.getCode().equals("XB"))resume.setGender(dataMap.getValue());//性别
+                        if(personalType.getCode().equals("XL"))resume.setDegree(dataMap.getValue());//学历
+                        if(personalType.getCode().equals("XX"))resume.setGraduateCollege(dataMap.getValue());//学校
+                        if(personalType.getCode().equals("ZY"))resume.setMajor(dataMap.getValue());//专业
+                        if(personalType.getCode().equals("GZJY"))resume.setWorkExperience(dataMap.getValue());//工作经验
+                        if(personalType.getCode().equals("JZD"))resume.setResidence(dataMap.getValue());//居住地
+                        if(personalType.getCode().equals("GZD"))resume.setWorkingPlace(dataMap.getValue());//工作地
+//                                    if(personalType.getCode().equals("SGTZ"))resume.setUserName(dataMap.getValue());//身高体重
+                        if(personalType.getCode().equals("SR"))resume.setBirthDateString(dataMap.getValue());//生日
+                        if(personalType.getCode().equals("YYNL"))resume.setLanguageAbility(dataMap.getValue());//语言能力
+                        if(personalType.getCode().equals("GW"))resume.setPosition(dataMap.getValue());//岗位
+//                                    if(personalType.getCode().equals("BYSJ"))resume.setUserName(dataMap.getValue());//毕业时间
+
+                    }
+                }
+            }
+            //如果姓名为空:取第一行值
+            if(StringUtils.isEmpty(resume.getUserName())){
+                /* 循环需第一行值 */
+                resume.setUserName(surplusMap.get("0"));
+            }
+            //如果手机号为空,通过正则提取
+            if(StringUtils.isEmpty(resume.getMobile())){
+                /**
+                 * 处理数据
+                 * 手机号
+                 */
+                for (Map.Entry<String, String> entry : surplusMap.entrySet()) {
+                    String content = entry.getValue().replaceAll("\\s+", "");
+                    String mobile = HanLPUtils.StringFilter(HanLPUtils.getHanLPTelephone(content));
+                    if(StringUtils.isNotEmpty(mobile)){
+                        resume.setMobile(mobile);
+                    }
+                }
+            }
+            //如果性别提取为空,根据正则提取
+            if(StringUtils.isEmpty(resume.getGender())){
+                /** 处理数据 */
+                for (Map.Entry<String, String> entry : surplusMap.entrySet()) {
+                    String content = entry.getValue().replaceAll("\\s+", "");
+                    String gender = HanLPUtils.StringFilter(HanLPUtils.getHanLPSex(content));
+                    if(StringUtils.isNotEmpty(gender)){
+                        resume.setGender(gender);
+                    }
+                }
+            }
+            //如果邮箱提取为空,则根据正则提取邮箱
+            if(StringUtils.isEmpty(resume.getEmail())){
+                /** 处理数据 */
+                for (Map.Entry<String, String> entry : surplusMap.entrySet()) {
+                    String content = entry.getValue().replaceAll("\\s+", "");
+                    String email = HanLPUtils.regularAcquisition("^[a-zA-Z0-9_-]+@[a-zA-Z0-9_-]+(\\.[a-zA-Z0-9_-]+)+$",content);
+                    if(StringUtils.isNotEmpty(email)){
+                        resume.setEmail(email);
+                    }
+                }
+            }
+            //如果年龄提取为空,则根据正则提取年龄
+            if(StringUtils.isEmpty(resume.getAge())){
+                /** 处理数据 */
+                for (Map.Entry<String, String> entry : surplusMap.entrySet()) {
+                    String content = entry.getValue().replaceAll("\\s+", "");
+                    String age = HanLPUtils.regularAcquisition("\\d{1,2}(岁)",content);
+                    if(StringUtils.isNotEmpty(age)){
+                        resume.setAge(age);
+                    }
+                }
+            }
+        }
+        //姓名去除特殊符号
+        if(StringUtils.isNotEmpty(resume.getUserName())){
+            resume.setUserName(HanLPUtils.StringFilter(resume.getUserName()));
+        }
+        //学历去除特殊符号
+        if(StringUtils.isNotEmpty(resume.getDegree())){
+            resume.setDegree(HanLPUtils.StringFilter(resume.getDegree()));
+        }
+        //手机号去除汉字
+        if(StringUtils.isNotEmpty(resume.getMobile())){
+            resume.setMobile(StringUtil.removeStrChinese(resume.getMobile()));
+        }
+        return resume;
+    }
+
+
+    /**
+     * 判断是否为大标题段落
+     * @param docContent
+     * @return
+     */
+    public static Map<String, Map<String,Object>> subsection(List<String> docContent){
+        Map<String, Map<String,Object>> resultMap = new HashMap<>();
+        for (int c = 0; c < docContent.size(); c++) {
+            Map<String, Object> result = new HashMap<String, Object>();
+            String txtContent = docContent.get(c);
+            if (StringUtils.isNotEmpty(txtContent)) {
+                //内容下标
+                result.put("coordinate", c);
+                /**
+                 * 先进行主要标识对比
+                 */
+                boolean isTitle = isTitle(txtContent);
+                result.put("isTitle", isTitle);
+                result.put("txtContent",txtContent);
+                resultMap.put(c+"",result);
+            }
+        }
+        return resultMap;
+    }
+
+    /**
+     *
+     * @param str
+     * @return
+     */
+    public static boolean isTitle(String str){
+        List<String> list = Arrays.asList("项目经验", "项目经历",
+                "工作经验","工作实习经历", "工作经历","工作履历",
+                "教育经历","培训经历","教育培训经历","教育背景",
+                "专业技能","技能","技能专长","技能特长", "掌握技能","职业技能","相关技能","个人技能",
+                "证书","技能证书","求职意向","推荐评语",
+                "个人评价", "自我评价","个人总结","个人特点","个人优势","自我认知","个人简介","个人简历",
+                "联系方式","个人信息","基本信息","个人资料");
+        for (String title:list) {
+            double titleSimilarity = Similarity.phraseSimilarity(title, HanLPUtils.StringFilter(str));
+            if(titleSimilarity >= 1 )return true;
+        }
+        return false;
+    }
+
+    /**
+     *
+     * @param resultMap
+     * @return
+     */
+    public static List<Integer> identification(Map<String, Map<String,Object>> resultMap){
+        List<Integer> list = new ArrayList<>();
+        for (Map.Entry<String, Map<String,Object>> entry : resultMap.entrySet()) {
+            Map<String,Object> map = entry.getValue();
+            if(null != map.get("isTitle") && (boolean)map.get("isTitle")){
+                list.add(Integer.valueOf(entry.getKey()));
+            }
+        }
+        Collections.sort(list);
+        return list;
+    }
+
+    /**
+     *
+     * @param list
+     * @param resultMap
+     * @return
+     */
+    public static  List<Map<String,String>> paragraph(List<Integer> list,Map<String, Map<String,Object>> resultMap){
+        List<Map<String,String>> paragraphList = new ArrayList<>();
+        for (int i = 0; i < list.size(); i++) {
+            Integer coordinate = list.get(i);
+            Map<String,String> paragraph = new HashMap<>();
+            for (Map.Entry<String, Map<String,Object>> entry : resultMap.entrySet()) {
+                Map<String,Object> map = entry.getValue();
+                if(null != map.get("isTitle")&&(boolean)map.get("isTitle")){
+                    if(Integer.valueOf(coordinate) == map.get("coordinate")){
+                        paragraph.put("title",map.get("txtContent").toString());
+                    }
+                }
+                if(null != map.get("isTitle")&&!(boolean)map.get("isTitle")){
+                    if(i+1 < list.size() &&
+                            coordinate<(Integer) map.get("coordinate")&&
+                            Integer.valueOf(list.get(i+1))>(Integer) map.get("coordinate")){
+                        //获取段落
+                        paragraph.put(entry.getKey(),map.get("txtContent").toString());
+                    } else if (i+1 == list.size()
+                            &&coordinate<(Integer) map.get("coordinate")) {
+                        paragraph.put(entry.getKey(),map.get("txtContent").toString());
+                    }
+                }
+            }
+            paragraphList.add(paragraph);
+        }
+        return paragraphList;
+    }
+
+    /**
+     * 筛选其他信息内容
+     * @param resultMap
+     * @param list
+     * @param paragraphList
+     * @return
+     */
+    public static Map<String,String> surplus(List<String> resultMap,List<Integer> list,List<Map<String,String>> paragraphList){
+        Map<String,String> basicsMap = new HashMap<>();
+        for (int i = 0; i < resultMap.size(); i++) {
+            boolean flag = true;
+            for (Integer j:list) {
+                if(j == i){
+                    flag = false;
+                }else{
+                    for (Map<String,String> paragraph: paragraphList) {
+                        for (Map.Entry<String,String> pMap : paragraph.entrySet()) {
+                            if(!pMap.getKey().equals("title")){
+                                if(i == Integer.valueOf(pMap.getKey())){
+                                    flag = false;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            if(flag){
+                basicsMap.put(i+"",resultMap.get(i));
+            }
+        }
+        return basicsMap;
+    }
+
+    /**
+     * 检查字符串包含多个关键词
+     * @param inputString
+     * @param words
+     * @return
+     */
+    public static boolean containsWordsIndexOf(String inputString, ResumeType words) {
+        boolean found = false;
+        for (String word : words.getCode()) {
+            if (inputString.indexOf(word) != -1) {
+                found = true;
+                break;
+            }
+        }
+        return found;
+    }
+
+    /**
+     *
+     * @param mapList
+     * @return
+     */
+    public static Map<Integer,String> toArrangementSort(List<Map<Integer,String>> mapList){
+        Map<Integer,String> resumeMap = new HashMap<>();
+        if(mapList!=null && mapList.size()>0){
+            for (Map<Integer,String> smallMap: mapList) {
+                if(smallMap.size()>0 && smallMap.size()==1){
+                    for (Map.Entry<Integer,String> sMap:  smallMap.entrySet()) {
+                        resumeMap.put(sMap.getKey(),sMap.getValue());
+                    }
+                } else if (smallMap.size()>0 && smallMap.size()>1) {
+                    TreeMap<Integer, String> paramTreeMap = new TreeMap<>(smallMap);
+                    resumeMap.put(paramTreeMap.firstKey(),paramTreeMap.get(paramTreeMap.firstKey()));
+                }
+            }
+        }
+        return resumeMap;
+    }
+
+    /**
+     *
+     * @param resumeMap
+     * @param content
+     * @return
+     */
+    public static Map<String,String> toSplitContent(Map<Integer,String> resumeMap,String content){
+
+        List<Integer> key = new ArrayList<>();
+        List<String> values = new ArrayList<>();
+        Map<String,String> subscriptMap = new HashMap<>();
+        for (Map.Entry<Integer,String> map: resumeMap.entrySet()) {
+            key.add(map.getKey());
+            values.add(map.getValue());
+        }
+        if(key!=null && key.size()>0){
+            Collections.sort(key);
+            for (int i = 0; i < key.size(); i++) {
+                key.get(i);
+                if(i+1 < key.size()){
+                    String str = content.substring(key.get(i),key.get(i+1));
+                    str = str.replaceAll(values.get(i),"").replaceAll(":","");
+                    subscriptMap.put(values.get(i),str);
+                } else if (i+1 == key.size()) {
+                    String str = content.substring(key.get(i),content.length());
+                    str = str.replaceAll(values.get(i),"").replaceAll(":","");
+                    subscriptMap.put(values.get(i),str);
+                }
+            }
+        }
+        return subscriptMap;
+    }
+
+
+    /**
+     *
+     * @param List
+     * @param content
+     * @return
+     */
+    public static Map<Integer,String> getSubscript(List<String> List,String content){
+        Map<Integer,String> map = new HashMap<>();
+        for (String words:List) {
+            if(content.contains(words)){
+                int subscript = StrStr(content,words);
+                if(subscript!=-1){
+                    map.put(subscript,words);
+                }
+            }
+        }
+        return map;
+    }
+
+    /**
+     *
+     * @param List
+     * @param content
+     * @return
+     */
+    public static boolean getVerificationType(List<String> List,String content){
+        Map<Integer,String> map = new HashMap<>();
+        for (String words:List) {
+            if(content.contains(words)){
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     *
+     * @param pattern
+     * @param n
+     * @return
+     */
+    private static int[] Build_PrefixTable(String pattern,int n)
+    {
+        if (n == 0) return new int[0];
+        int[] prefix = new int[n];
+        int i = 1, j = 0;
+        while(i < n)
+        {
+            while (j > 0 && pattern.charAt(i) != pattern.charAt(j))
+            {
+                j = prefix[j - 1];
+            }
+            if (pattern.charAt(i) == pattern.charAt(j))
+            {
+                j++;
+            }
+            prefix[i] = j;
+            i++;
+        }
+        return prefix;
+    }
+
+    /**
+     *
+     * @param haystack
+     * @param needle
+     * @return
+     */
+    public static int StrStr(String haystack, String needle)
+    {
+        if(needle.length()>haystack.length()) return -1;
+        int n = needle.length(), m = haystack.length();
+        int[] prefix = Build_PrefixTable(needle,n);
+        for(int i=0,j=0;i<m;i++)
+        {
+            while(j>0 && haystack.charAt(i)!= needle.charAt(j))
+            {
+                j=prefix[j-1];
+            }
+            if(haystack.charAt(i)==needle.charAt(j))
+            {
+                j++;
+            }
+            if(j==n)
+            {
+                return i-n +1;
+            }
+        }
+        return -1;
+    }
+
+
+    /**
+     * 分段并排序
+     * @param list
+     */
+    public static List<ResumeProject> toProjectSubsection(List<Map<String,Object>> list){
+        //排序
+        Collections.sort(list, new Comparator<Map<String,Object>>() {
+            @Override
+            public int compare(Map<String,Object> stu1, Map<String,Object> stu2) {
+                return  Integer.valueOf(stu1.get("key").toString()) - Integer.valueOf(stu2.get("key").toString());
+            }
+        });
+        /**
+         * 数据结构定义
+         */
+        //当前小标识
+        String type = null;
+        String sign = "";
+        ResumeProject resumeProject = null;
+        List<ResumeProject> projects = new ArrayList<>();
+        //处理数据分析
+        for (int i = 0; i < list.size(); i++) {
+            Map<String,Object> map = list.get(i);
+            Map<String,String> dataList = (Map<String,String>) map.get("dataList");
+            if(dataList != null && dataList.size() > 0){
+                for (Map.Entry<String,String> dataMap: dataList.entrySet()) {
+                    ProjectType projectType = ProjectUtils.toJudgeType(dataMap.getKey());
+                    type = projectType.getCode();
+                    if(projectType.getCode().equals(ProjectType.XMMC.getCode())){//项目名称
+                        if(resumeProject!=null&&StringUtils.isNotEmpty(resumeProject.getProjectName())){
+                            projects.add(resumeProject);
+                        }
+                        resumeProject = new ResumeProject();
+                        resumeProject.setProjectName(dataMap.getValue());
+                    }
+                    if(projectType.getCode().equals(ProjectType.XMJJ.getCode()))//项目描述
+                        if(resumeProject!=null){
+                            if(StringUtils.isNotEmpty(sign)&&sign.equals(ProjectType.XMJJ.getCode())){
+                                projects.add(resumeProject);
+                                resumeProject = new ResumeProject();
+                                resumeProject.setProjectName(list.get(i-1).get("content").toString());
+                            }else{
+                                resumeProject.setProjectDetail(dataMap.getValue());
+                            }
+                        }else{
+                            if(i!=0){
+                                sign = ProjectType.XMJJ.getCode();
+                                resumeProject = new ResumeProject();
+                                resumeProject.setProjectName(list.get(i-1).get("content").toString());
+                            }
+                        }
+                    if(projectType.getCode().equals(ProjectType.XMZZ.getCode())) //责任描述
+                        if(resumeProject!=null){
+                            if(StringUtils.isNotEmpty(sign)&&sign.equals(ProjectType.XMZZ.getCode())){
+                                projects.add(resumeProject);
+                                resumeProject = new ResumeProject();
+                                resumeProject.setProjectName(list.get(i-1).get("content").toString());
+                            }else{
+                                resumeProject.setPartIn(dataMap.getValue());
+                            }
+                        }else{
+                            if(i!=0){
+                                sign = ProjectType.XMZZ.getCode();
+                                resumeProject = new ResumeProject();
+                                resumeProject.setProjectName(list.get(i-1).get("content").toString());
+                            }
+                        }
+                    if(projectType.getCode().equals(ProjectType.XMJS.getCode()))//软件环境
+                        if(resumeProject!=null){
+                            if(StringUtils.isNotEmpty(sign)&&sign.equals(ProjectType.XMJS.getCode())){
+                                projects.add(resumeProject);
+                                resumeProject = new ResumeProject();
+                                resumeProject.setProjectName(list.get(i-1).get("content").toString());
+                            }else{
+                                resumeProject.setDevelopEnvironment(dataMap.getValue());
+                            }
+                        }else{
+                            if(i!=0){
+                                sign = ProjectType.XMJS.getCode();
+                                resumeProject = new ResumeProject();
+                                resumeProject.setProjectName(list.get(i-1).get("content").toString());
+                            }
+                        }
+                    if(projectType.getCode().equals(ProjectType.KFGJ.getCode()))
+                        if(resumeProject!=null){
+                            if(StringUtils.isNotEmpty(sign)&&sign.equals(ProjectType.KFGJ.getCode())){
+                                projects.add(resumeProject);
+                                resumeProject = new ResumeProject();
+                                resumeProject.setProjectName(list.get(i-1).get("content").toString());
+                            }else{
+                                resumeProject.setDevelopTools(dataMap.getValue());//开发工具
+                            }
+                        }else{
+                            if(i!=0){
+                                sign = ProjectType.KFGJ.getCode();
+                                resumeProject = new ResumeProject();
+                            }
+                        }
+                    if(projectType.getCode().equals(ProjectType.XMZQ.getCode()));//项目周期
+                }
+            }else {
+                //处理无标识数据问题
+                if (type != null && resumeProject!=null) {
+                    if (type.equals(ProjectType.XMMC.getCode()))
+                        resumeProject.setProjectName(resumeProject.getProjectName() + map.get("content").toString());
+                    if (type.equals(ProjectType.XMJJ.getCode()))
+                        resumeProject.setProjectDetail(resumeProject.getProjectDetail() + map.get("content").toString());
+                    if (type.equals(ProjectType.XMZZ.getCode()))
+                        resumeProject.setPartIn(resumeProject.getPartIn() + map.get("content").toString());
+                    if (type.equals(ProjectType.XMJS.getCode()))
+                        resumeProject.setDevelopEnvironment(resumeProject.getDevelopEnvironment() + map.get("content").toString());
+                    if (type.equals(ProjectType.KFGJ.getCode()))
+                        resumeProject.setDevelopTools(resumeProject.getDevelopTools() + map.get("content").toString());
+                    if (type.equals(ProjectType.XMZQ.getCode()));
+                }else{
+
+                }
+            }
+            if(resumeProject!=null
+                    &&StringUtils.isNotEmpty(resumeProject.getProjectName())
+                    &&resumeProject.getProjectName().length()>50){
+                resumeProject.setProjectName(resumeProject.getProjectName().substring(0, 50));
+            }
+        }
+        //判断最后一个是否存在
+        if(resumeProject!=null&&StringUtils.isNotEmpty(resumeProject.getProjectName())){
+            projects.add(resumeProject);
+        }
+        return projects;
+    }
+
+}

+ 28 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/ResumeType.java

@@ -0,0 +1,28 @@
+package com.ruoyi.utils.resumeAnalysis;
+
+public enum ResumeType {
+
+    PERSONAL(new String[]{"个人信息", "个人资料","基本信息","联系方式","个人简历"}),
+    WORKEXPERIENCE(new String[]{"工作经验","工作实习经历", "工作经历","工作履历"}),
+    PROJECTEXPERIENCE(new String[]{"项目经验", "项目经历"}),
+    SELFEVALUATION(new String[]{"个人评价","个人简介","个人特点","自我评价","个人总结","个人优势","自我认知"}),
+    PROFESSIONALSKILLS(new String[]{"专业技能","技能","技能专长","个人技能","技能特长","推荐评语", "掌握技能","职业技能","相关技能"}),
+    CERTIFICATE(new String[]{"证书","技能证书"}),
+    EDUCATIONALEXPERIENCE(new String[]{"教育经历","培训经历","教育培训经历","教育背景"}),
+
+    OTHER(new String[]{"求职意向"});
+
+
+    private final String[] code;
+
+    ResumeType(String[] code)
+    {
+        this.code = code;
+    }
+
+    public String[] getCode()
+    {
+        return code;
+    }
+
+}

+ 180 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/Similarity.java

@@ -0,0 +1,180 @@
+package com.ruoyi.utils.resumeAnalysis.similarity;
+
+
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.phrase.PhraseSimilarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.sentence.editdistance.EditDistance;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.sentence.editdistance.GregorEditDistanceSimilarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.sentence.editdistance.NewEditDistanceSimilarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.sentence.editdistance.StandardEditDistanceSimilarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.sentence.morphology.MorphoSimilarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.CharBasedSimilarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.clin.CilinSimilarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.concept.ConceptSimilarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.tendency.word.HownetWordTendency;
+
+/**
+ * Similarity 相似度计算工具包
+ *
+ * @author itbluebox
+ */
+public class Similarity {
+
+    public static final class Config {
+        /**
+         * 词林编码路径
+         */
+        public static String CilinPath = "similarity/cilin.db.gz";
+        /**
+         * 拼音词典路径
+         */
+        public static String PinyinPath = "similarity/F02-GB2312-to-PuTongHua-PinYin.txt";
+        /**
+         * concept路径
+         */
+        public static String ConceptPath = "similarity/concept.dat";
+        /**
+         * concept.xml.gz路径
+         */
+        public static String ConceptXmlPath = "similarity/concept.xml.gz";
+        /**
+         * 义原关系的路径
+         */
+        public static String SememePath = "similarity/sememe.dat";
+        /**
+         * 义原数据路径
+         */
+        public static String SememeXmlPath = "similarity/sememe.xml.gz";
+        /**
+         * 词频统计输出路径
+         */
+        public static String StatisticsResultPath = "data/WordFrequencyStatistics-Result.txt";
+
+    }
+
+    private Similarity() {
+    }
+
+
+    /**
+     * 词语相似度
+     * 计算词林编码相似度
+     *
+     * @param word1
+     * @param word2
+     * @return
+     */
+    public static double cilinSimilarity(String word1, String word2) {
+        return CilinSimilarity.getInstance().getSimilarity(word1, word2);
+    }
+
+    /**
+     * 词语相似度
+     * 计算拼音相似度
+     *
+     * @param word1
+     * @param word2
+     * @return
+     */
+    public static double pinyinSimilarity(String word1, String word2) {
+        return PhraseSimilarity.getInstance().getSimilarity(word1, word2);
+    }
+
+    /**
+     * 词语相似度
+     * 计算字面相似度
+     *
+     * @param word1
+     * @param word2
+     * @return
+     */
+    public static double charBasedSimilarity(String word1, String word2) {
+        return CharBasedSimilarity.getInstance().getSimilarity(word1, word2);
+    }
+
+    /**
+     * 词语相似度
+     * 计算语义概念相似度
+     *
+     * @param word1
+     * @param word2
+     * @return
+     */
+    public static double conceptSimilarity(String word1, String word2) {
+        return ConceptSimilarity.getInstance().getSimilarity(word1, word2);
+    }
+
+    /**
+     * 短语相似度
+     *
+     * @param pharse1
+     * @param pharse2
+     * @return
+     */
+    public static double phraseSimilarity(String pharse1, String pharse2) {
+        return PhraseSimilarity.getInstance().getSimilarity(pharse1, pharse2);
+    }
+
+    /**
+     * 句子相似度
+     * 词形和词序结合法
+     *
+     * @param sentence1
+     * @param sentence2
+     * @return
+     */
+    public static double morphoSimilarity(String sentence1, String sentence2) {
+        return MorphoSimilarity.getInstance().getSimilarity(sentence1, sentence2);
+    }
+
+    /**
+     * 句子相似度
+     * 夏天编辑距离法
+     *
+     * @param sentence1
+     * @param sentence2
+     * @return
+     */
+    public static double editDistanceSimilarity(String sentence1, String sentence2) {
+        EditDistance ed = new NewEditDistanceSimilarity();
+        return ed.getSimilarity(sentence1, sentence2);
+    }
+
+    /**
+     * 句子相似度
+     * Gregor编辑距离算法
+     *
+     * @param sentence1
+     * @param sentence2
+     * @return
+     */
+    public static double gregorEditDistanceSimilarity(String sentence1, String sentence2) {
+        EditDistance ed = new GregorEditDistanceSimilarity();
+        return  ed.getSimilarity(sentence1, sentence2);
+    }
+
+    /**
+     * 句子相似度
+     * 标准编辑距离算法
+     *
+     * @param sentence1
+     * @param sentence2
+     * @return
+     */
+    public static double standardEditDistanceSimilarity(String sentence1, String sentence2) {
+        EditDistance ed = new StandardEditDistanceSimilarity();
+        return  ed.getSimilarity(sentence1, sentence2);
+    }
+
+    /**
+     * 词语情感分析
+     *
+     * @param word
+     * @return
+     */
+    public static double tendency(String word) {
+        HownetWordTendency hownetWordTendency = new HownetWordTendency();
+        return hownetWordTendency.getTendency(word);
+    }
+
+}
+

+ 75 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/classification/Feature.java

@@ -0,0 +1,75 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.classification;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * 文档的特征
+ */
+public class Feature {
+
+    /**
+     * 每个关键词在不同类别中出现的文档数量
+     */
+    private Map<String, Integer> docCountMap = new HashMap<>();
+    /**
+     * 特征名称
+     */
+    private String name;
+
+    public String getName() {
+        return name;
+    }
+
+    public void setName(String name) {
+        this.name = name;
+    }
+
+    public void incDocCount(String category) {
+        if (docCountMap.containsKey(category)) {
+            docCountMap.put(category, docCountMap.get(category) + 1);
+        } else {
+            docCountMap.put(category, 1);
+        }
+    }
+
+    public int getDocCount(String category) {
+        if (docCountMap.containsKey(category)) {
+            return docCountMap.get(category);
+        } else {
+            return 0;
+        }
+    }
+
+    public void write(DataOutput out) throws IOException {
+        out.writeUTF(name == null ? "" : name);
+
+        out.writeInt(docCountMap.size());
+        for (String category : docCountMap.keySet()) {
+            out.writeUTF(category);
+            out.writeInt(docCountMap.get(category));
+        }
+    }
+
+    public void readFields(DataInput in) throws IOException {
+        this.name = in.readUTF();
+
+        docCountMap = new HashMap<>();
+        int size = in.readInt();
+        for (int i = 0; i < size; i++) {
+            String category = in.readUTF();
+            int docCount = in.readInt();
+            docCountMap.put(category, docCount);
+        }
+    }
+
+    public static Feature read(DataInput in) throws IOException {
+        Feature f = new Feature();
+        f.readFields(in);
+        return f;
+    }
+
+}

+ 62 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/classification/Instance.java

@@ -0,0 +1,62 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.classification;
+
+import com.ruoyi.utils.resumeAnalysis.similarity.tokenizer.Tokenizer;
+import com.ruoyi.utils.resumeAnalysis.similarity.tokenizer.Word;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.*;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * 代表一个文档实例
+ */
+public class Instance {
+
+    private static Logger logger = LoggerFactory.getLogger(Instance.class);
+
+    /**
+     * 文档类别
+     */
+    private String category;
+    /**
+     * 文档内容
+     */
+    private final Set<String> bag = new HashSet<>();
+
+    public Instance(String category, File f, String encoding) {
+        this.category = category;
+        String line = null;
+
+        try (BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(f), encoding))) {
+            while ((line = in.readLine()) != null) {
+                List<Word> words = Tokenizer.segment(line);
+                bag.addAll(words
+                        .stream()
+                        .filter(w -> w.getPos().endsWith("adj") || w.getPos().startsWith("n") || w.getPos().startsWith("v"))
+                        .map(Word::getName)
+                        .collect(Collectors.toList())
+                );
+            }
+        } catch (IOException e) {
+            logger.error("current file:{},current line:{}", f.getAbsolutePath(), line);
+            e.printStackTrace();
+        }
+    }
+
+    public String getCategory() {
+        return category;
+    }
+
+    public void setCategory(String category) {
+        this.category = category;
+    }
+
+    public Set<String> getWords() {
+        return bag;
+    }
+
+}

+ 98 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/classification/NaiveBayesClassifier.java

@@ -0,0 +1,98 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.classification;
+
+import java.io.*;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+
+public class NaiveBayesClassifier {
+
+    /**
+     * 记录每个类别下出现的文档数量, 用于计算P(C)使用
+     */
+    Variable VARIABLE = new Variable();
+
+    /**
+     * 词语在所有类别中的总数量
+     */
+    Map<String, Integer> TERM_TOTAL_COUNT = new HashMap<>();
+
+    /**
+     * 训练一篇文档
+     *
+     * @param doc
+     */
+    public void training(Instance doc) {
+        VARIABLE.addInstance(doc);
+    }
+
+    /**
+     * 保存训练结果
+     *
+     * @throws IOException
+     */
+    void save(File file) throws IOException {
+        DataOutput out = new DataOutputStream(new FileOutputStream(file));
+        VARIABLE.write(out);
+    }
+
+    public void load(File file) throws IOException {
+        DataInputStream in = new DataInputStream(new FileInputStream(file));
+        VARIABLE = Variable.read(in);
+    }
+
+    /**
+     * 计算P(C)
+     *
+     * @param category
+     * @return
+     */
+    public double getCategoryProbability(String category) {
+        return Math.log(VARIABLE.getDocCount(category) * 1.0f / VARIABLE.getDocCount());
+    }
+
+    /**
+     * 计算P(feature|cateogry),返回的是取对数后的数值
+     *
+     * @param feature
+     * @param category
+     * @return
+     */
+    public double getFeatureProbability(String feature, String category) {
+        int m = VARIABLE.getFeatureCount();
+        return Math.log((VARIABLE.getDocCount(feature, category) + 1.0) / (VARIABLE.getDocCount(category) + m));
+    }
+
+    /**
+     * 计算给定实例文档属于指定类别的概率
+     *
+     * @param category
+     * @param doc
+     * @return 返回的是取对数后的数值
+     */
+    public double getProbability(String category, Instance doc) {
+        double result = getCategoryProbability(category);
+        for (String feature : doc.getWords()) {
+            if (VARIABLE.containFeature(feature)) {
+                result += getFeatureProbability(feature, category);
+            }
+        }
+        return result;
+    }
+
+    public String getCategory(Instance doc) {
+        Collection<String> categories = VARIABLE.getCategories();
+        double best = Double.NEGATIVE_INFINITY;
+        String bestName = null;
+        for (String c : categories) {
+            double current = getProbability(c, doc);
+            //			System.out.println(c + ":" + current);
+            if (best < current) {
+                best = current;
+                bestName = c;
+            }
+        }
+        return bestName;
+    }
+
+}

+ 171 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/classification/Variable.java

@@ -0,0 +1,171 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.classification;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * 分类的类别
+ */
+public class Variable {
+
+    /**
+     * 类别信息
+     */
+    Map<String, CategoryInfo> categoryMap = new HashMap<>();
+
+    Map<String, Feature> features = new HashMap<>();
+
+    /**
+     * 所有文档的数量
+     */
+    private int docCount = 0;
+
+    public void write(DataOutput out) throws IOException {
+        //保存文档总数
+        out.writeInt(docCount);
+
+        //写入类别总数
+        out.writeInt(categoryMap.size());
+        for (String category : categoryMap.keySet()) {
+            out.writeUTF(category);
+            categoryMap.get(category).write(out);
+        }
+
+        //写入Feature总数
+        out.writeInt(features.size());
+        for (String key : features.keySet()) {
+            out.writeUTF(key);
+            features.get(key).write(out);
+        }
+    }
+
+    public void readFields(DataInput in) throws IOException {
+        this.docCount = in.readInt();
+
+        int size = in.readInt();
+        categoryMap = new HashMap<>();
+        for (int i = 0; i < size; i++) {
+            String category = in.readUTF();
+            CategoryInfo info = CategoryInfo.read(in);
+            categoryMap.put(category, info);
+        }
+
+        size = in.readInt();
+        features = new HashMap<>();
+        for (int i = 0; i < size; i++) {
+            String word = in.readUTF();
+            Feature feature = Feature.read(in);
+            features.put(word, feature);
+        }
+    }
+
+    public static Variable read(DataInput in) throws IOException {
+        Variable v = new Variable();
+        v.readFields(in);
+        return v;
+    }
+
+    public Collection<String> getCategories() {
+        return categoryMap.keySet();
+    }
+
+    public int getFeatureCount() {
+        return features.size();
+    }
+
+    public boolean containFeature(String feature) {
+        return features.containsKey(feature);
+    }
+
+    public void incDocCount() {
+        this.docCount++;
+    }
+
+    public int getDocCount() {
+        return this.docCount;
+    }
+
+    /**
+     * 获取置顶类别下的文档数量
+     *
+     * @param category
+     * @return
+     */
+    public int getDocCount(String category) {
+        return categoryMap.get(category).getDocCount();
+    }
+
+    /**
+     * 获取feature在指定类别下的文档出现数量
+     *
+     * @param feature
+     * @param category
+     * @return
+     */
+    public int getDocCount(String feature, String category) {
+        Feature f = features.get(feature);
+        if (f != null) {
+            return f.getDocCount(category);
+        }
+        return 0;
+    }
+
+    public void addInstance(Instance instance) {
+        incDocCount();
+        CategoryInfo info;
+        if (categoryMap.containsKey(instance.getCategory())) {
+            info = categoryMap.get(instance.getCategory());
+        } else {
+            info = new CategoryInfo();
+        }
+        info.incDocCount();
+        categoryMap.put(instance.getCategory(), info);
+
+        for (String word : instance.getWords()) {
+            Feature feature = features.get(word);
+
+            if (feature == null)
+                feature = new Feature();
+
+            feature.setName(word);
+            feature.incDocCount(instance.getCategory());
+
+            features.put(word, feature);
+        }
+    }
+
+    public static class CategoryInfo {
+        private int docCount;
+
+        public int getDocCount() {
+            return docCount;
+        }
+
+        public void incDocCount() {
+            this.docCount++;
+        }
+
+        public void setDocCount(int docCount) {
+            this.docCount = docCount;
+        }
+
+        public void write(DataOutput out) throws IOException {
+            out.writeInt(docCount);
+        }
+
+        public void readFields(DataInput in) throws IOException {
+            this.docCount = in.readInt();
+        }
+
+        public static CategoryInfo read(DataInput in) throws IOException {
+            CategoryInfo c = new CategoryInfo();
+            c.readFields(in);
+            return c;
+        }
+    }
+
+}

+ 17 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/ISimilarity.java

@@ -0,0 +1,17 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity;
+
+/**
+ * 计算相似度
+ *
+ * @author itbluebox
+ */
+public interface ISimilarity {
+    /**
+     * 计算相似度
+     *
+     * @param word1 词语1
+     * @param word2 词语2
+     * @return 相似度值
+     */
+    double getSimilarity(String word1, String word2);
+}

+ 60 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/phrase/PhraseSimilarity.java

@@ -0,0 +1,60 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.phrase;
+
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.ISimilarity;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * 一种简单的短语相似度计算方法
+ *
+ * @author itbluebox
+ */
+public class PhraseSimilarity implements ISimilarity {
+    private static PhraseSimilarity instance = null;
+
+    public static PhraseSimilarity getInstance() {
+        if (instance == null) {
+            instance = new PhraseSimilarity();
+        }
+        return instance;
+    }
+    @Override
+    public double getSimilarity(String phrase1, String phrase2) {
+        return (getSC(phrase1, phrase2) + getSC(phrase2, phrase1)) / 2.0;
+    }
+
+    private List<Integer> getC(String first, String second, int pos) {
+        List<Integer> results = new ArrayList<Integer>();
+        char ch = first.charAt(pos);
+        for (int i = 0; i < second.length(); i++) {
+            if (ch == second.charAt(i)) {
+                results.add(i);
+            }
+        }
+        return results;
+    }
+
+    private int getDistance(String first, String second, int pos) {
+        int d = second.length();
+        for (int k : getC(first, second, pos)) {
+            int value = Math.abs(k - pos);
+            if (d > value) {
+                d = value;
+            }
+        }
+        return d;
+    }
+
+    private double getCC(String first, String second, int pos) {
+        return (second.length() - getDistance(first, second, pos)) * 1.0 / second.length();
+    }
+
+    private double getSC(String first, String second) {
+        double total = 0.0;
+        for (int i = 0; i < first.length(); i++) {
+            total = total + getCC(first, second, i);
+        }
+        return total / first.length();
+    }
+}

+ 12 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/ISentenceSimilarity.java

@@ -0,0 +1,12 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.sentence;
+
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.ISimilarity;
+
+/**
+ * 词语相似度接口
+ *
+ * @author itbluebox
+ */
+public interface ISentenceSimilarity extends ISimilarity {
+
+}

+ 118 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/editdistance/Block.java

@@ -0,0 +1,118 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.sentence.editdistance;
+
+public class Block<T> {
+
+    private int globalPosition;
+    /**
+     * 块的内容
+     */
+    private SuperString<T> data;
+    /**
+     * 前后指针
+     */
+    private Block<T> prev, next;
+    /**
+     * 是否已经进行划分
+     */
+    private boolean divideFlag = false;
+
+    public Block(SuperString<T> string) {
+        this.data = string;
+        this.globalPosition = 0;
+    }
+
+    public Block(SuperString<T> string, int globalBegin) {
+        this.data = string;
+        this.globalPosition = globalBegin;
+    }
+
+    public int getGlobalPosition() {
+        return globalPosition;
+    }
+
+    public void setGlobalPosition(int globalPosition) {
+        this.globalPosition = globalPosition;
+    }
+
+    public SuperString<T> getData() {
+        return data;
+    }
+
+    public void setData(SuperString<T> data) {
+        this.data = data;
+    }
+
+    public Block<T> getPrev() {
+        return prev;
+    }
+
+    public void setPrev(Block<T> prev) {
+        this.prev = prev;
+    }
+
+    public Block<T> getNext() {
+        return next;
+    }
+
+    public void setNext(Block<T> next) {
+        this.next = next;
+    }
+
+    public boolean isDivideFlag() {
+        return divideFlag;
+    }
+
+    public void setDivideFlag(boolean divideFlag) {
+        this.divideFlag = divideFlag;
+    }
+
+    public void divide(int start, int length) {
+        if (start == 0 && length == data.length()) {
+            this.divideFlag = true;
+            return;
+        } else if (start == 0) {
+            //前面为已经分割的标记,后面应该为未分割的标记
+            Block<T> tail = new Block<T>(data.substring(length), globalPosition + start);
+            this.setDivideFlag(true);
+            this.setData(data.substring(0, length));
+            tail.next = this.next;
+            if (tail.next != null)
+                tail.next.prev = tail;
+            this.next = tail;
+            tail.prev = this;
+        } else if (start + length == data.length()) {
+            //后面为已经分割的标记,前面应该为未分割的标记
+            Block<T> head = new Block<T>(data.substring(0, start), globalPosition);
+
+            this.setDivideFlag(true);
+            this.setData(data.substring(start));
+
+            head.prev = this.prev;
+            if (head.prev != null)
+                head.prev.next = head;
+            head.next = this;
+            this.prev = head;
+        } else {
+            //中间为已经分割的标记,前面和后面应该为未分割的标记
+            Block<T> head = new Block<T>(data.substring(0, start), globalPosition);
+            Block<T> tail = new Block<T>(data.substring(start + length), globalPosition + start + length);
+
+            this.setDivideFlag(true);
+            this.setData(data.substring(start, start + length));
+            this.setGlobalPosition(globalPosition + start);
+
+            head.prev = this.prev;
+            if (head.prev != null)
+                head.prev.next = head;
+            head.next = this;
+            this.prev = head;
+
+            tail.next = this.next;
+            if (tail.next != null)
+                tail.next.prev = tail;
+            this.next = tail;
+            tail.prev = this;
+        }
+    }
+
+}

+ 16 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/editdistance/CharEditUnit.java

@@ -0,0 +1,16 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.sentence.editdistance;
+
+public class CharEditUnit extends EditUnit {
+
+	private String content = "";
+
+	public CharEditUnit(Character ch) {
+		content = ch.toString();
+	}
+
+	@Override
+	public String getUnitString() {
+		return content;
+	}
+
+}

+ 49 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/editdistance/ChunkEditUnit.java

@@ -0,0 +1,49 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.sentence.editdistance;
+
+public class ChunkEditUnit extends EditUnit {
+
+	private SuperString<? extends EditUnit> chunk = null;
+
+	public ChunkEditUnit(SuperString<? extends EditUnit> chunk) {
+		this.chunk = chunk;
+	}
+
+	@Override
+	public String getUnitString() {
+		return chunk.toString();
+	}
+
+	/**
+	 * 根据此语的相似度获取替换代价
+	 */
+	@Override
+	public double getSubstitutionCost(EditUnit otherUnit) {
+		if (!(otherUnit instanceof ChunkEditUnit))
+			return chunk.length();
+		if (equals(otherUnit))
+			return 0.0;
+
+		ChunkEditUnit other = (ChunkEditUnit) otherUnit;
+		return new StandardEditDistanceSimilarity().getEditDistance(chunk, other.chunk);
+	}
+
+	/**
+	 * 获取删除代价,标准算法的默认值为1.0, 此处也设为1.0
+	 * 具体的编辑单元可以通过覆盖该方法设置不同的删除代价
+	 * @return 删除代价
+	 */
+	@Override
+	public double getDeletionCost() {
+		return chunk.length();
+	}
+
+	/**
+	 * 获取插入代价,标准算法的默认值为1.0.
+	 * 具体的编辑单元可以通过覆盖该方法设置不同的插入代价
+	 */
+	@Override
+	public double getInsertionCost() {
+		return chunk.length();
+	}
+
+}

+ 20 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/editdistance/EditDistance.java

@@ -0,0 +1,20 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.sentence.editdistance;
+
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.ISimilarity;
+
+/**
+ * 编辑距离
+ *
+ * @author itbluebox
+ */
+public abstract class EditDistance implements ISimilarity {
+    @Override
+    public double getSimilarity(String str1, String str2) {
+        SuperString<WordEditUnit> S = SuperString.createWordSuperString(str1);
+        SuperString<WordEditUnit> T = SuperString.createWordSuperString(str2);
+        //return 1 - (getEditDistance(S, T)) / (Math.max(S.length(), T.length()));
+        return 1.0 / (getEditDistance(S, T) + 1);
+    }
+
+    public abstract double getEditDistance(SuperString<? extends EditUnit> S, SuperString<? extends EditUnit> T);
+}

+ 52 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/editdistance/EditUnit.java

@@ -0,0 +1,52 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.sentence.editdistance;
+
+/**
+ * 编辑单元
+ * @author itbluebox
+ */
+public abstract class EditUnit {
+
+	/**
+	 * 获取编辑单元的内部字符串
+	 * @return
+	 */
+	public abstract String getUnitString();
+
+	/**
+	 * 获取替换代价,默认替换代价当替换单元的内容相同时为0,
+	 * 不同时为1
+	 */
+	public double getSubstitutionCost(EditUnit other) {
+		return this.equals(other) ? 0 : 1;
+	}
+
+	/**
+	 * 获取删除代价,标准算法的默认值为1.0, 此处也设为1.0
+	 * 具体的编辑单元可以通过覆盖该方法设置不同的删除代价
+	 * @return 删除代价
+	 */
+	public double getDeletionCost() {
+		return 1.0;
+	}
+
+	/**
+	 * 获取插入代价,标准算法的默认值为1.0.
+	 * 具体的编辑单元可以通过覆盖该方法设置不同的插入代价
+	 */
+	public double getInsertionCost() {
+		return 1.0;
+	}
+
+	@Override
+	public boolean equals(Object other) {
+		if (!(other instanceof EditUnit))
+			return false;
+		return getUnitString().equals(((EditUnit) other).getUnitString());
+	}
+
+	@Override
+	public String toString() {
+		return getUnitString();
+	}
+
+}

+ 114 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/editdistance/GregorEditDistanceSimilarity.java

@@ -0,0 +1,114 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.sentence.editdistance;
+
+/**
+ * 由Gregor提出的考虑块交换(Block Transposition)的编辑距离改进算法
+ * 时间复杂度为O(m3n3)
+ * 具体实现请参考GregorLeusch,Nicola Ueffing的文章《A Novel String-to-String Distance Measure With
+ * Application to Machine Translation Evaluation》
+ * 问题:<br/>
+ * 相似度计算的问题会影响句子相似度计算的直观结果,例如“什么是计算机病毒”,“电脑病毒是什么”
+ * 直觉应该是2,即“什么是计算机病毒”首先变为“计算机病毒什么是”,再变为“计算机病毒是什么”,
+ * 编辑代价为2,但实际上,当由“什么是计算机病毒”变为“计算机病毒什么是”后,由于"什么是"与“是什么”的替换代价只有0.2,
+ * 因而不再进行交互,故总的编辑距离为1.2
+ */
+public class GregorEditDistanceSimilarity extends EditDistance {
+
+    /**
+     * 块交换代价
+     */
+    public static double swapCost = 0.5;
+
+    private SuperString<? extends EditUnit> S, T;
+
+    /**
+     * 存放字符串从S(i0-i1)到T(j0-j1)的中间运算结果,避免多次运算,提高运算效率
+     */
+    private double[][][][] QArray;
+
+    @Override
+    public double getEditDistance(SuperString<? extends EditUnit> S, SuperString<? extends EditUnit> T) {
+        this.S = S;
+        this.T = T;
+        QArray = new double[S.length()][S.length()][T.length()][T.length()];
+        for (int i = 0; i < S.length(); i++) {
+            for (int i2 = 0; i2 < S.length(); i2++)
+                for (int j = 0; j < T.length(); j++)
+                    for (int j2 = 0; j2 < T.length(); j2++) {
+                        QArray[i][i2][j][j2] = Double.MAX_VALUE;
+                    }
+        }
+
+        return Q(0, S.length() - 1, 0, T.length() - 1);
+    }
+
+    private double Q(int i0, int i1, int j0, int j1) {
+        double cost = 0;
+
+        if (i1 < i0) {
+            for (int j = j0; j <= j1; j++) {
+                cost += T.elementAt(j).getInsertionCost();
+            }
+            return cost;
+        } else if (j1 < j0) {
+            for (int i = i0; i <= i1; i++) {
+                cost += S.elementAt(i).getDeletionCost();
+            }
+            return cost;
+        } else if (i1 == i0 && j1 == j0) {
+            cost = S.elementAt(i0).getSubstitutionCost(T.elementAt(j0));
+            QArray[i0][i1][j0][j1] = cost;
+            return cost;
+        } else if (i1 == i0) {
+            double minSubstituteValue = 1.0;
+            int minPosJ = j0;
+            for (int j = j0; j <= j1; j++) {
+                double subsitituteValue = S.elementAt(i0).getSubstitutionCost(T.elementAt(j));
+                if (minSubstituteValue > subsitituteValue) {
+                    minSubstituteValue = subsitituteValue;
+                    minPosJ = j;
+                }
+            }
+            for (int j = j0; j <= j1; j++) {
+                if (j == minPosJ) {
+                    cost += minSubstituteValue;
+                } else {
+                    cost += T.elementAt(j).getInsertionCost();
+                }
+            }
+        } else if (j1 == j0) {
+            double minSubstituteValue = 1.0;
+            int minPosI = i0;
+            for (int i = i0; i <= i1; i++) {
+                double subsitituteValue = S.elementAt(i).getSubstitutionCost(T.elementAt(j0));
+                if (minSubstituteValue > subsitituteValue) {
+                    minSubstituteValue = subsitituteValue;
+                    minPosI = i;
+                }
+            }
+            for (int i = i0; i <= i1; i++) {
+                if (i == minPosI) {
+                    cost += minSubstituteValue;
+                } else {
+                    cost += S.elementAt(i).getDeletionCost();
+                }
+            }
+        } else {
+            if (QArray[i0][i1][j0][j1] < Double.MAX_VALUE) {
+                return QArray[i0][i1][j0][j1];
+            }
+            for (int i = i0; i < i1; i++) {
+                for (int j = j0; j < j1; j++) {
+                    double c = Math.min(Q(i0, i, j0, j) + Q(i + 1, i1, j + 1, j1),
+                            Q(i0, i, j + 1, j1) + Q(i + 1, i1, j0, j) + swapCost);
+                    if (c < QArray[i0][i1][j0][j1]) {
+                        QArray[i0][i1][j0][j1] = c;
+                    }
+                }
+            }
+            return QArray[i0][i1][j0][j1];
+        }
+        QArray[i0][i1][j0][j1] = cost;
+        return cost;
+    }
+
+}

+ 112 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/editdistance/NewEditDistanceSimilarity.java

@@ -0,0 +1,112 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.sentence.editdistance;
+
+/**
+ * 夏天提出的新的支持非相邻块交互的编辑距离算法
+ * @author xiatian
+ */
+public class NewEditDistanceSimilarity extends EditDistance {
+
+    /**
+     * 块交换代价
+     */
+    private final double swapCost = 1.0;
+
+    private SuperString<? extends EditUnit> S, T;
+    private double[][][][] QArray;
+
+    @Override
+    @SuppressWarnings("unchecked")
+    public double getEditDistance(SuperString<? extends EditUnit> S1, SuperString<? extends EditUnit> T1) {
+        Object[] array = Split.split(S1, T1);
+        this.S = (SuperString<? extends EditUnit>) array[0];
+        this.T = (SuperString<? extends EditUnit>) array[1];
+        QArray = new double[S.length() + 1][S.length() + 1][T.length() + 1][T.length() + 1];
+        for (int i = 0; i <= S.length(); i++) {
+            for (int i2 = 0; i2 <= S.length(); i2++)
+                for (int j = 0; j <= T.length(); j++)
+                    for (int j2 = 0; j2 <= T.length(); j2++) {
+                        QArray[i][i2][j][j2] = Double.MAX_VALUE;
+                    }
+        }
+        return Q(0, S.length() - 1, 0, T.length() - 1);
+    }
+
+    private double Q(int i1, int im, int j1, int jn) {
+        if (QArray[i1][im][j1][jn] < Double.MAX_VALUE) {
+            return QArray[i1][im][j1][jn];
+        }
+        double cost = 0;
+        if (im < i1) {
+            for (int j = j1; j <= jn; j++) {
+                cost += T.elementAt(j).getInsertionCost();
+            }
+        } else if (jn < j1) {
+            for (int i = i1; i <= im; i++) {
+                cost += S.elementAt(i).getDeletionCost();
+            }
+        } else if (im == i1 && jn == j1) {
+            cost = S.elementAt(i1).getSubstitutionCost(T.elementAt(j1));
+        } else if (i1 == im) {
+            double minSubValue = S.elementAt(i1).getSubstitutionCost(T.elementAt(j1));
+            int minPosJ = j1;
+            for (int j = j1 + 1; j <= jn; j++) {
+                double subValue = S.elementAt(i1).getSubstitutionCost(T.elementAt(j));
+                if (minSubValue > subValue) {
+                    minSubValue = subValue;
+                    minPosJ = j;
+                }
+            }
+            for (int j = j1; j <= jn; j++) {
+                if (j == minPosJ) {
+                    cost += minSubValue;
+                } else {
+                    cost += T.elementAt(j).getInsertionCost();
+                }
+            }
+        } else if (j1 == jn) {
+            int minPosI = i1;
+            double minSubValue = S.elementAt(i1).getSubstitutionCost(T.elementAt(j1));
+            for (int i = i1 + 1; i <= im; i++) {
+                double subValue = S.elementAt(i).getSubstitutionCost(T.elementAt(j1));
+                if (minSubValue > subValue) {
+                    minSubValue = subValue;
+                    minPosI = i;
+                }
+            }
+            for (int i = i1; i <= im; i++) {
+                if (i == minPosI) {
+                    cost += minSubValue;
+                } else {
+                    cost += S.elementAt(i).getDeletionCost();
+                }
+            }
+        } else {
+            cost = QArray[i1][im][j1][jn];
+            loop:
+            for (int i = i1; i < im; i++) {
+                //block X divide to 3 parts.
+                for (int LX = 0; LX <= im - i; LX++) {
+                    //process Y sentence
+                    for (int j = j1; j < jn; j++) {
+                        //if(cost<=swapCost)break;
+                        for (int LY = 0; LY <= jn - j; LY++) {
+                            // 不交换的代价
+                            double cost1 = Q(i1, i, j1, j) + Q(i + 1, i + LX, j + 1, j + LY)
+                                    + Q(i + LX + 1, im, j + LY + 1, jn);
+                            // 交互代价
+                            double cost2 = Q(i1, i, j + LY + 1, jn) + Q(i + 1, i + LX, j + 1, j + LY)
+                                    + Q(i + LX + 1, im, j1, j) + swapCost;
+                            cost = Math.min(Math.min(cost1, cost2), cost);
+                            if (cost == 0)
+                                break loop;
+                        }
+                    }
+                }
+            }
+        }
+
+        QArray[i1][im][j1][jn] = cost;
+        return cost;
+    }
+
+}

+ 105 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/editdistance/Split.java

@@ -0,0 +1,105 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.sentence.editdistance;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class Split {
+
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    public static Object[] split(SuperString<? extends EditUnit> X, SuperString<? extends EditUnit> Y) {
+        Block<? extends EditUnit> LX = new Block(X);
+        Block<? extends EditUnit> LY = new Block(Y);
+        split(LX, LY);
+        while (LY.getPrev() != null) {
+            LY = LY.getPrev();
+        }
+        while (LX.getPrev() != null) {
+            LX = LX.getPrev();
+        }
+        List<ChunkEditUnit> first = new ArrayList<>();
+        List<ChunkEditUnit> second = new ArrayList<>();
+        while (LX != null) {
+            first.add(new ChunkEditUnit(LX.getData()));
+            LX = LX.getNext();
+        }
+
+        while (LY != null) {
+            second.add(new ChunkEditUnit(LY.getData()));
+            LY = LY.getNext();
+        }
+        SuperString<ChunkEditUnit> s1 = new SuperString<>(first);
+        SuperString<ChunkEditUnit> s2 = new SuperString<>(second);
+        Object[] obj = new Object[]{s1, s2};
+        return obj;
+    }
+
+    private static void split(Block<?> bx, Block<?> LY) {
+        LCS maxLCS = null;
+        Block<?> by = LY;
+        while (by.getPrev() != null) {
+            by = by.getPrev();
+        }
+        Block<?> maxMatchedBy = by;
+        while (by != null) {
+            if (by.isDivideFlag()) {
+                by = by.getNext();
+                continue;
+            }
+
+            LCS lcs = LCS.parse(bx.getData(), by.getData());
+            if (maxLCS == null || maxLCS.length < lcs.length) {
+                maxLCS = lcs;
+                maxMatchedBy = by;
+            }
+
+            by = by.getNext();
+        }
+
+        if (maxLCS != null && maxLCS.length > 0) {
+            bx.divide(maxLCS.x_pos, maxLCS.length);
+            maxMatchedBy.divide(maxLCS.y_pos, maxLCS.length);
+        }
+
+        if (bx.getPrev() != null && !bx.isDivideFlag()) {
+            split(bx.getPrev(), LY);
+        }
+
+        if (bx.getNext() != null && !bx.getNext().isDivideFlag()) {
+            split(bx.getNext(), LY);
+        }
+    }
+
+    /**
+     * Longest Common String
+     *
+     * @author Gavin
+     */
+    public static class LCS {
+        public int length = 0; //LCS匹配的最长结果
+        public int x_pos = 0; //LCS匹配的X的位置
+        public int y_pos = 0; //LCS匹配的Y的位置
+
+        public static LCS parse(SuperString<?> X, SuperString<?> Y) {
+            LCS lcs = new LCS();
+            for (int start = 0; start < X.length(); start++) {
+                for (int end = start + 1; end <= X.length(); end++) {
+                    SuperString<?> tempX = X.substring(start, end);
+
+                    int pos = Y.indexOf(tempX);
+                    if (pos >= 0 && tempX.length() > lcs.length) {
+                        lcs.length = tempX.length();
+                        lcs.x_pos = start;
+                        lcs.y_pos = pos;
+                    }
+                }
+            }
+            return lcs;
+        }
+
+        @Override
+        public String toString() {
+            return "length=" + length + ", x_pos=" + x_pos + ", y_pos=" + y_pos;
+        }
+    }
+
+}

+ 63 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/editdistance/StandardEditDistanceSimilarity.java

@@ -0,0 +1,63 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.sentence.editdistance;
+
+/**
+ * 基于编辑距离的汉语句子相似度计算
+ *
+ * @author itbluebox
+ */
+public class StandardEditDistanceSimilarity extends EditDistance {
+
+    /**
+     * 获取两个串的编辑距离
+     */
+    @Override
+    public double getEditDistance(SuperString<? extends EditUnit> X, SuperString<? extends EditUnit> Y) {
+        double[][] D; //编辑矩阵
+
+        int m = X.length(); //字符串X的长度
+        int n = Y.length(); //字符串Y的长度
+        //char ch_x_i;       //字符串X的第i个词
+        //char ch_y_j;       //字符串Y的第j个词
+
+        if (m == 0) {
+            double distance = 0.0;
+            for (int j = 0; j < n; j++) {
+                distance += Y.elementAt(j).getInsertionCost();
+            }
+            return distance;
+        } else if (n == 0) {
+            double distance = 0.0;
+            for (int i = 0; i < m; i++) {
+                distance += X.elementAt(i).getDeletionCost();
+            }
+            return distance;
+        }
+
+        D = new double[n + 1][m + 1];
+        D[0][0] = 0.0; //第一个初始化为0
+
+        // 初始化D[0][j]
+        for (int j = 1; j <= m; j++) {
+            D[0][j] = D[0][j - 1] + X.elementAt(j - 1).getDeletionCost();
+        }
+
+        // 初始化D[i][0]/
+        for (int i = 1; i <= n; i++) {
+            D[i][0] = D[i - 1][0] + Y.elementAt(i - 1).getInsertionCost();
+        }
+
+        for (int i = 1; i <= m; i++) {
+            EditUnit unit_x_i = X.elementAt(i - 1);
+            for (int j = 1; j <= n; j++) {
+                EditUnit unit_y_j = Y.elementAt(j - 1);
+                double cost = unit_x_i.getSubstitutionCost(unit_y_j);
+                D[j][i] = Math.min(D[j - 1][i] + Y.elementAt(j - 1).getInsertionCost(), D[j][i - 1]
+                        + X.elementAt(i - 1).getDeletionCost());
+                D[j][i] = Math.min(D[j][i], D[j - 1][i - 1] + cost);
+            }
+        }
+
+        return D[n][m];
+    }
+
+}

+ 92 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/editdistance/SuperString.java

@@ -0,0 +1,92 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.sentence.editdistance;
+
+
+
+import com.ruoyi.utils.resumeAnalysis.similarity.tokenizer.Tokenizer;
+import com.ruoyi.utils.resumeAnalysis.similarity.tokenizer.Word;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * 超级字符串,可以存放指定的数据类型
+ */
+public class SuperString<T> {
+
+    private List<T> contents = new ArrayList<>();
+
+    public SuperString(List<T> contents) {
+        this.contents = contents;
+    }
+
+    public static SuperString<CharEditUnit> createCharSuperString(String str) {
+        List<CharEditUnit> list = new ArrayList<>(str.length());
+        for (int i = 0; i < str.length(); i++) {
+            list.add(new CharEditUnit(str.charAt(i)));
+        }
+        return new SuperString<>(list);
+    }
+
+    public static SuperString<WordEditUnit> createWordSuperString(String sentence) {
+        List<Word> wordList = Tokenizer.segment(sentence);
+        List<WordEditUnit> unitList = new ArrayList<>(wordList.size());
+        for (int i = 0; i < wordList.size(); i++) {
+            unitList.add(new WordEditUnit(wordList.get(i)));
+        }
+        return new SuperString<>(unitList);
+    }
+
+    public T elementAt(int pos) {
+        if (pos < 0 || pos >= contents.size()) {
+            throw new ArrayIndexOutOfBoundsException("下标越界");
+        }
+        return contents.get(pos);
+    }
+
+    public int indexOf(SuperString<?> substring) {
+        int result = -1;
+        for (int i = 0; i < length(); i++) {
+            int j = 0;
+            if (i + substring.length() > length())
+                return -1;
+
+            for (; j < substring.length(); j++) {
+                if (elementAt(i + j).equals(substring.elementAt(j))) {
+                    continue;
+                } else {
+                    break;
+                }
+            }
+            if (j == substring.length()) {
+                return i;
+            }
+        }
+        return result;
+    }
+
+    public SuperString<T> substring(int fromIndex, int toIndex) {
+        return new SuperString<>(contents.subList(fromIndex, toIndex));
+    }
+
+    public SuperString<T> substring(int fromIndex) {
+        return new SuperString<>(contents.subList(fromIndex, contents.size()));
+    }
+
+    public int length() {
+        return contents.size();
+    }
+
+    @Override
+    public boolean equals(Object other) {
+        return toString().equals(other.toString());
+    }
+
+    @Override
+    public String toString() {
+        StringBuilder sb = new StringBuilder();
+        for (int i = 0; i < length(); i++) {
+            sb.append(elementAt(i));
+        }
+        return sb.toString();
+    }
+}

+ 52 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/editdistance/WordEditUnit.java

@@ -0,0 +1,52 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.sentence.editdistance;
+
+
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.concept.ConceptSimilarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.tokenizer.Word;
+
+public class WordEditUnit extends EditUnit {
+
+    private Word word = null;
+
+    public WordEditUnit(Word word) {
+        this.word = word;
+    }
+
+    @Override
+    public String getUnitString() {
+        return word.getName();
+    }
+
+    /**
+     * 根据相似度获取替换代价
+     */
+    @Override
+    public double getSubstitutionCost(EditUnit otherUnit) {
+        if (!(otherUnit instanceof WordEditUnit))
+            return 1.0;
+        if (equals(otherUnit))
+            return 0.0;
+
+        WordEditUnit other = (WordEditUnit) otherUnit;
+        // 词性不同,直接返回1.0
+        if (word.getPos() != other.word.getPos()) {
+            return 1.0;
+        }
+        return 1 - ConceptSimilarity.getInstance().getSimilarity(getUnitString(), other.getUnitString());
+    }
+
+    @Override
+    public boolean equals(Object other) {
+        if (!(other instanceof WordEditUnit))
+            return false;
+        WordEditUnit otherUnit = (WordEditUnit) other;
+        Word otherWord = otherUnit.word;
+        // 词性不同,直接返回1.0
+        if (word.getPos() != otherWord.getPos()) {
+            return false;
+        }
+        double sim = ConceptSimilarity.getInstance().getSimilarity(getUnitString(), otherUnit.getUnitString());
+        return sim > 0.85;
+    }
+
+}

+ 138 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/morphology/MorphoSimilarity.java

@@ -0,0 +1,138 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.sentence.morphology;
+
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.sentence.ISentenceSimilarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.IWordSimilarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.concept.ConceptSimilarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.tokenizer.Tokenizer;
+import com.ruoyi.utils.resumeAnalysis.similarity.tokenizer.Word;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * 基于词形和词序的句子相似度计算算法,考虑了语义因素
+ *
+ * @author itbluebox
+ */
+public class MorphoSimilarity implements ISentenceSimilarity {
+    private static Logger logger = LoggerFactory.getLogger(MorphoSimilarity.class);
+
+    // 词形相似度占总相似度比重
+    private final double LAMBDA1 = 1.0;
+    // 词序相似度占比
+    private final double LAMBDA2 = 0.0;
+    private IWordSimilarity wordSimilarity;
+    private static final String FILTER_CHARS = "  ,。;?《》()|!,.;?<>|_^…!";
+    private static MorphoSimilarity instance;
+
+    public static MorphoSimilarity getInstance() {
+        if (instance == null) {
+            instance = new MorphoSimilarity();
+        }
+        return instance;
+    }
+
+    private MorphoSimilarity() {
+        this.wordSimilarity = ConceptSimilarity.getInstance();
+        logger.debug("used hownet word similarity.");
+    }
+
+    private String[] filter(String[] words) {
+        List<String> results = new ArrayList<>();
+        for (String s : words) {
+            if (!FILTER_CHARS.contains(s)) {
+                results.add(s.toLowerCase());
+            }
+        }
+
+        return results.toArray(new String[results.size()]);
+    }
+
+    @Override
+    public double getSimilarity(String sentence1, String sentence2) {
+        String[] list1 = filter(segment(sentence1));
+        String[] list2 = filter(segment(sentence2));
+        double wordSimilarity = getOccurrenceSimilarity(list1, list2);
+        double orderSimilarity = getOrderSimilarity(list1, list2);
+        return LAMBDA1 * wordSimilarity + LAMBDA2 * orderSimilarity;
+    }
+
+    /**
+     * 获取两个集合的词形相似度, 同时获取相对于第一个句子中的词语顺序,第二个句子词语的顺序变化次数
+     *
+     * @param list1
+     * @param list2
+     * @return
+     */
+    private double getOccurrenceSimilarity(String[] list1, String[] list2) {
+        int max = list1.length > list2.length ? list1.length : list2.length;
+        if (max == 0) {
+            return 0;
+        }
+
+        //首先计算出所有可能的组合
+        double[][] scores = new double[max][max];
+        for (int i = 0; i < list1.length; i++) {
+            for (int j = 0; j < list2.length; j++) {
+                scores[i][j] = wordSimilarity.getSimilarity(list1[i], list2[j]);
+            }
+        }
+
+        double total_score = 0;
+
+        //从scores[][]中挑选出最大的一个相似度,然后减去该元素,进一步求剩余元素中的最大相似度
+        while (scores.length > 0) {
+            double max_score = 0;
+            int max_row = 0;
+            int max_col = 0;
+
+            //先挑出相似度最大的一对:<row, column, max_score>
+            for (int i = 0; i < scores.length; i++) {
+                for (int j = 0; j < scores.length; j++) {
+                    if (max_score < scores[i][j]) {
+                        max_row = i;
+                        max_col = j;
+                        max_score = scores[i][j];
+                    }
+                }
+            }
+
+            //从数组中去除最大的相似度,继续挑选
+            double[][] tmp_scores = new double[scores.length - 1][scores.length - 1];
+            for (int i = 0; i < scores.length; i++) {
+                if (i == max_row)
+                    continue;
+                for (int j = 0; j < scores.length; j++) {
+                    if (j == max_col)
+                        continue;
+                    int tmp_i = max_row > i ? i : i - 1;
+                    int tmp_j = max_col > j ? j : j - 1;
+                    tmp_scores[tmp_i][tmp_j] = scores[i][j];
+                }
+            }
+            total_score += max_score;
+            scores = tmp_scores;
+        }
+
+        return (2 * total_score) / (list1.length + list2.length);
+    }
+
+    /**
+     * 获取两个集合的词序相似度
+     */
+    private double getOrderSimilarity(String[] list1, String[] list2) {
+        double similarity = 0.0;
+        return similarity;
+    }
+
+    public String[] segment(String sentence) {
+        List<Word> list = Tokenizer.segment(sentence);
+        String[] results = new String[list.size()];
+        for (int i = 0; i < list.size(); i++) {
+            results[i] = list.get(i).getName();
+        }
+        return results;
+    }
+}

+ 179 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/sentence/morphology/SemanticSimilarity.java

@@ -0,0 +1,179 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.sentence.morphology;
+
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.sentence.ISentenceSimilarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.IWordSimilarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.concept.ConceptSimilarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.tokenizer.Tokenizer;
+import com.ruoyi.utils.resumeAnalysis.similarity.tokenizer.Word;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * 基于词形和词序的句子相似度计算算法
+ * 在考虑语义时,无法直接获取OnceWS(A, B), * 为此,通过记录两个句子的词语匹配对中相似度
+ * 大于某一阈值的词语对为相同词语,计算次序相似度。
+ *
+ * @author itbluebox
+ */
+public class SemanticSimilarity implements ISentenceSimilarity {
+    /**
+     * 词形相似度占总相似度的比重
+     */
+    private final double LAMBDA1 = 0.8;
+    /**
+     * 词序相似度占总相似度的比重
+     */
+    private final double LAMBDA2 = 0.2;
+    /**
+     * 如果两个词语的相似度大于了该阈值, 则作为相同词语,计算词序相似度
+     */
+    private final double GAMMA = 0.6;
+    /**
+     * 词语相似度的计算
+     */
+    private IWordSimilarity wordSimilarity = null;
+
+    private static String FILTER_CHARS = "  ,。;?《》()|!,.;?<>|_^…!";
+
+    private static SemanticSimilarity instance = null;
+
+    public static SemanticSimilarity getInstance() {
+        if (instance == null) {
+            instance = new SemanticSimilarity();
+        }
+        return instance;
+    }
+
+    private SemanticSimilarity() {
+        this.wordSimilarity = ConceptSimilarity.getInstance();
+    }
+
+    /**
+     * 滤掉词串中的空格、标点符号
+     *
+     * @param words
+     * @return
+     */
+    private String[] filter(String[] words) {
+        List<String> results = new ArrayList<String>();
+        for (String w : words) {
+            if (!FILTER_CHARS.contains(w)) {
+                results.add(w.toLowerCase());
+            }
+        }
+
+        return results.toArray(new String[results.size()]);
+    }
+
+    /**
+     * 计算两个句子的相似度
+     */
+    @Override
+    public double getSimilarity(String sentence1, String sentence2) {
+        String[] list1 = filter(segment(sentence1));
+        String[] list2 = filter(segment(sentence2));
+
+        return calculate(list1, list2);
+    }
+
+    /**
+     * 获取两个集合的词形相似度, 同时获取相对于第一个句子中的词语顺序,第二个句子词语的顺序变化次数
+     *
+     * @param list1
+     * @param list2
+     * @return
+     */
+    public double calculate(String[] list1, String[] list2) {
+        if (list1.length == 0 || list2.length == 0) {
+            return 0;
+        }
+
+        //首先计算出所有可能的组合
+        double[][] scores = new double[list1.length][list2.length];
+
+        //代表第1个句子对应位置是否已经被使用, 默认为未使用,即false
+        boolean[] firstFlags = new boolean[list1.length];
+
+        //代表第2个句子对应位置是否已经被使用, 默认为未使用,即false
+        boolean[] secondFlags = new boolean[list2.length];
+
+        //PSecond的定义参见书中5.4.3节, 为避免无必要的初始化数组,
+        //数组中0值表示在第一个句子中没有对应的相似词语,大于0的值
+        //则表示在第一个句子中的位置(从1开始编号了)
+        int[] PSecond = new int[list2.length];
+
+        for (int i = 0; i < list1.length; i++) {
+            //firstFlags[i] = false;
+            for (int j = 0; j < list2.length; j++) {
+                scores[i][j] = wordSimilarity.getSimilarity(list1[i], list2[j]);
+            }
+        }
+        double total_score = 0;
+        //从scores[][]中挑选出最大的一个相似度,然后减去该元素(通过Flags数组表示),进一步求剩余元素中的最大相似度
+        while (true) {
+            double max_score = 0;
+            int max_row = -1;
+            int max_col = -1;
+
+            //先挑出相似度最大的一对:<row, column, max_score>
+            for (int i = 0; i < scores.length; i++) {
+                if (firstFlags[i])
+                    continue;
+                for (int j = 0; j < scores.length; j++) {
+                    if (secondFlags[j])
+                        continue;
+
+                    if (max_score < scores[i][j]) {
+                        max_row = i;
+                        max_col = j;
+                        max_score = scores[i][j];
+                    }
+                }
+            }
+
+            if (max_row >= 0) {
+                total_score += max_score;
+                firstFlags[max_row] = true;
+                secondFlags[max_col] = true;
+                if (max_score >= GAMMA) {
+                    PSecond[max_col] = max_row + 1;
+                }
+            } else {
+                break;
+            }
+        }
+        double wordSim = (2 * total_score) / (list1.length + list2.length);
+        int previous = 0;
+        int revOrdCount = 0;
+        int onceWSSize = 0;
+        for (int i = 0; i < PSecond.length; i++) {
+            if (PSecond[i] > 0) {
+                onceWSSize++;
+                if (previous > 0 && (previous > PSecond[i])) {
+                    revOrdCount++;
+                }
+                previous = PSecond[i];
+            }
+        }
+        double ordSim = 0;
+        if (onceWSSize == 1) {
+            ordSim = 1;
+        } else if (onceWSSize == 0) {
+            ordSim = 0;
+        } else {
+            ordSim = 1.0 - revOrdCount * 1.0 / (onceWSSize - 1);
+        }
+        System.out.println("wordSim ==> " + wordSim + ", ordSim ==> " + ordSim);
+        return LAMBDA1 * wordSim + LAMBDA2 * ordSim;
+    }
+
+    public String[] segment(String sentence) {
+        List<Word> list = Tokenizer.segment(sentence);
+        String[] results = new String[list.size()];
+        for (int i = 0; i < list.size(); i++) {
+            results[i] = list.get(i).getName();
+        }
+        return results;
+    }
+}

+ 91 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/text/CosineSimilarity.java

@@ -0,0 +1,91 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.text;
+
+
+
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.util.AtomicFloat;
+import com.ruoyi.utils.resumeAnalysis.similarity.tokenizer.Word;
+
+import java.math.BigDecimal;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * 余弦相似度计算
+ * 判定方式:余弦相似度,通过计算两个向量的夹角余弦值来评估他们的相似度
+ * 余弦夹角原理:
+ * 向量a=(x1,y1),向量b=(x2,y2)
+ * similarity=a.b/|a|*|b|
+ * a.b=x1x2+y1y2
+ * |a|=根号[(x1)^2+(y1)^2],|b|=根号[(x2)^2+(y2)^2]
+ *
+ * @author itbluebox
+ */
+public class CosineSimilarity extends TextSimilarity {
+    /**
+     * 文本相似度计算
+     * 判定方式:余弦相似度,通过计算两个向量的夹角余弦值来评估他们的相似度
+     * 余弦夹角原理:
+     * 向量a=(x1,y1),向量b=(x2,y2)
+     * similarity=a.b/|a|*|b|
+     * a.b=x1x2+y1y2
+     * |a|=根号[(x1)^2+(y1)^2],|b|=根号[(x2)^2+(y2)^2]
+     *
+     * @param words1 词列表1
+     * @param words2 词列表2
+     * @return 分值
+     */
+    @Override
+    public double getSimilarityImpl(List<Word> words1, List<Word> words2) {
+        // 词频标注词的权重
+        taggingWeightByFrequency(words1, words2);
+        // 权重容器
+        Map<String, Float> weightMap1 = getFastSearchMap(words1);
+        Map<String, Float> weightMap2 = getFastSearchMap(words2);
+        Set<Word> words = new HashSet<>();
+        words.addAll(words1);
+        words.addAll(words2);
+        AtomicFloat ab = new AtomicFloat();// a.b
+        AtomicFloat aa = new AtomicFloat();// |a|的平方
+        AtomicFloat bb = new AtomicFloat();// |b|的平方
+        // 计算
+        words.parallelStream()
+                .forEach(word -> {
+                    Float x1 = weightMap1.get(word.getName());
+                    Float x2 = weightMap2.get(word.getName());
+                    if (x1 != null && x2 != null) {
+                        //x1x2
+                        float oneOfTheDimension = x1 * x2;
+                        //+
+                        ab.addAndGet(oneOfTheDimension);
+                    }
+                    if (x1 != null) {
+                        //(x1)^2
+                        float oneOfTheDimension = x1 * x1;
+                        //+
+                        aa.addAndGet(oneOfTheDimension);
+                    }
+                    if (x2 != null) {
+                        //(x2)^2
+                        float oneOfTheDimension = x2 * x2;
+                        //+
+                        bb.addAndGet(oneOfTheDimension);
+                    }
+                });
+        //|a|
+        double aaa = Math.sqrt(aa.doubleValue());
+        //|b|
+        double bbb = Math.sqrt(bb.doubleValue());
+        //使用BigDecimal保证精确计算浮点数
+        //|a|*|b|
+        //double aabb = aaa * bbb;
+        BigDecimal aabb = BigDecimal.valueOf(aaa).multiply(BigDecimal.valueOf(bbb));
+        //similarity=a.b/|a|*|b|
+        //double cos = ab.get() / aabb.doubleValue();
+        double cos = BigDecimal.valueOf(ab.get()).divide(aabb, 9, BigDecimal.ROUND_HALF_UP).doubleValue();
+        return cos;
+    }
+
+
+}

+ 77 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/text/DiceTextSimilarity.java

@@ -0,0 +1,77 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.text;
+
+import com.ruoyi.utils.resumeAnalysis.similarity.tokenizer.Word;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * 文本相似度计算
+ * 判定方式:Sorensen–Dice系数(Sorensen–Dice coefficient),通过计算两个集合交集的大小的2倍除以两个集合的大小之和来评估他们的相似度
+ * 算法步骤描述:
+ * 1、分词
+ * 2、求交集(去重),计算交集的不重复词的个数 intersectionSize
+ * 3、两个集合的大小分别为 set1Size 和 set2Size
+ * 4、相似度分值 = 2*intersectionSize/(set1Size+set2Size)
+ * 完整计算公式:
+ * double score = 2*intersectionSize/(set1Size+set2Size);
+ *
+ * @author 杨尚川
+ */
+public class DiceTextSimilarity extends TextSimilarity {
+    /**
+     * 计算相似度分值
+     *
+     * @param words1 词列表1
+     * @param words2 词列表2
+     * @return 相似度分值
+     */
+    @Override
+    protected double getSimilarityImpl(List<Word> words1, List<Word> words2) {
+        if (words1.isEmpty() && words2.isEmpty()) {
+            return 1.0;
+        }
+        //转变为不重复的集合
+        Set<Word> words1Set = new HashSet<>(words1);
+        Set<Word> words2Set = new HashSet<>(words2);
+        // 两个集合的大小
+        int set1Size = words1Set.size();
+        int set2Size = words2Set.size();
+
+        // 求交集(去重),计算交集的不重复词的个数
+        words1Set.retainAll(words2Set);
+        int intersectionSize = words1Set.size();
+
+        //相似度分值
+        double score = 2 * intersectionSize / (double) (set1Size + set2Size);
+        if (LOGGER.isDebugEnabled()) {
+            LOGGER.debug("集合1:" + words1);
+            LOGGER.debug("集合2:" + words2);
+            LOGGER.debug("集合1的大小:" + set1Size);
+            LOGGER.debug("集合2的大小:" + set2Size);
+            LOGGER.debug("交集的大小:" + intersectionSize);
+            LOGGER.debug("相似度分值=2*" + intersectionSize + "/(double)(" + set1Size + "+" + set2Size + ")=" + score);
+        }
+        return score;
+    }
+
+    public static void main(String[] args) {
+        String text1 = "我爱购物";
+        String text2 = "我爱读书";
+        String text3 = "他是黑客";
+        TextSimilarity textSimilarity = new DiceTextSimilarity();
+        double score1pk1 = textSimilarity.getSimilarity(text1, text1);
+        double score1pk2 = textSimilarity.getSimilarity(text1, text2);
+        double score1pk3 = textSimilarity.getSimilarity(text1, text3);
+        double score2pk2 = textSimilarity.getSimilarity(text2, text2);
+        double score2pk3 = textSimilarity.getSimilarity(text2, text3);
+        double score3pk3 = textSimilarity.getSimilarity(text3, text3);
+        System.out.println(text1 + " 和 " + text1 + " 的相似度分值:" + score1pk1);
+        System.out.println(text1 + " 和 " + text2 + " 的相似度分值:" + score1pk2);
+        System.out.println(text1 + " 和 " + text3 + " 的相似度分值:" + score1pk3);
+        System.out.println(text2 + " 和 " + text2 + " 的相似度分值:" + score2pk2);
+        System.out.println(text2 + " 和 " + text3 + " 的相似度分值:" + score2pk3);
+        System.out.println(text3 + " 和 " + text3 + " 的相似度分值:" + score3pk3);
+    }
+}

+ 78 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/text/EditDistanceSimilarity.java

@@ -0,0 +1,78 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.text;
+
+import com.ruoyi.utils.resumeAnalysis.similarity.tokenizer.Word;
+
+import java.util.List;
+
+/**
+ * 编辑距离(Edit Distance)相似度计算
+ * 文本相似度计算
+ * 指两个字串之间,由一个转成另一个所需的最少编辑操作次数
+ * 允许的编辑操作包括将一个字符替换成另一个字符,增加一个字符,删除一个字符
+ * 例如将kitten一字转成sitting:
+ * sitten (k→s)将一个字符k替换成另一个字符s
+ * sittin (e→i)将一个字符e替换成另一个字符i
+ * sitting (→g)增加一个字符g
+ * 因为这个算法是俄罗斯科学家Vladimir Levenshtein在1965年提出
+ * 所以编辑距离(Edit Distance)又称Levenshtein距离
+ *
+ * @author itbluebox
+ */
+public class EditDistanceSimilarity extends TextSimilarity {
+    /**
+     * 计算相似度分值
+     *
+     * @param words1 词列表1
+     * @param words2 词列表2
+     * @return 相似度分值
+     */
+    @Override
+    protected double getSimilarityImpl(List<Word> words1, List<Word> words2) {
+        //文本1
+        StringBuilder text1 = new StringBuilder();
+        words1.forEach(word -> text1.append(word.getName()));
+        //文本2
+        StringBuilder text2 = new StringBuilder();
+        words2.forEach(word -> text2.append(word.getName()));
+        int maxTextLength = Math.max(text1.length(), text2.length());
+        if (maxTextLength == 0) {
+            //两个空字符串
+            return 1.0;
+        }
+        //计算文本1和文本2的编辑距离
+        int editDistance = editDistance(text1.toString(), text2.toString());
+        double score = (1 - editDistance / (double) maxTextLength);
+        if (LOGGER.isDebugEnabled()) {
+            LOGGER.debug("文本1:" + text1.toString());
+            LOGGER.debug("文本2:" + text2.toString());
+            LOGGER.debug("文本1和文本2的编辑距离:" + editDistance);
+            LOGGER.debug("文本1和文本2的最大长度:" + maxTextLength);
+            LOGGER.debug("文本1和文本2的相似度分值:1 - " + editDistance + " / (double)" + maxTextLength + "=" + score);
+        }
+        return score;
+    }
+
+    private int editDistance(String text1, String text2) {
+        int[] costs = new int[text2.length() + 1];
+        for (int i = 0; i <= text1.length(); i++) {
+            int previousValue = i;
+            for (int j = 0; j <= text2.length(); j++) {
+                if (i == 0) {
+                    costs[j] = j;
+                } else if (j > 0) {
+                    int useValue = costs[j - 1];
+                    if (text1.charAt(i - 1) != text2.charAt(j - 1)) {
+                        useValue = Math.min(Math.min(useValue, previousValue), costs[j]) + 1;
+                    }
+                    costs[j - 1] = previousValue;
+                    previousValue = useValue;
+
+                }
+            }
+            if (i > 0) {
+                costs[text2.length()] = previousValue;
+            }
+        }
+        return costs[text2.length()];
+    }
+}

+ 119 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/text/EuclideanDistanceTextSimilarity.java

@@ -0,0 +1,119 @@
+/**
+ * APDPlat - Application Product Development Platform
+ * Copyright (c) 2013, 杨尚川, yang-shangchuan@qq.com
+ * <p>
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * <p>
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * <p>
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.text;
+
+
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.util.AtomicFloat;
+import com.ruoyi.utils.resumeAnalysis.similarity.tokenizer.Word;
+
+import java.math.BigDecimal;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * 文本相似度计算
+ * 判定方式:欧几里得距离(Euclidean Distance),通过计算两点间的距离来评估他们的相似度
+ * 欧几里得距离原理:
+ * 设A(x1, y1),B(x2, y2)是平面上任意两点
+ * 两点间的距离dist(A,B)=sqrt((x1-x2)^2+(y1-y2)^2)
+ *
+ * @author 杨尚川
+ */
+public class EuclideanDistanceTextSimilarity extends TextSimilarity {
+    /**
+     * 判定相似度的方式:欧几里得距离
+     * 欧几里得距离原理:
+     * 设A(x1, y1),B(x2, y2)是平面上任意两点
+     * 两点间的距离dist(A,B)=sqrt((x1-x2)^2+(y1-y2)^2)
+     *
+     * @param words1 词列表1
+     * @param words2 词列表2
+     * @return 相似度分值
+     */
+    @Override
+    protected double getSimilarityImpl(List<Word> words1, List<Word> words2) {
+        //用词频来标注词的权重
+        taggingWeightByFrequency(words1, words2);
+        //构造权重快速搜索容器
+        Map<String, Float> weights1 = getFastSearchMap(words1);
+        Map<String, Float> weights2 = getFastSearchMap(words2);
+        //所有的不重复词
+        Set<Word> words = new HashSet<>();
+        words.addAll(words1);
+        words.addAll(words2);
+        //向量的维度为words的大小,每一个维度的权重是词频
+        //(x1-x2)^2+(y1-y2)^2
+        AtomicFloat ab = new AtomicFloat();
+        //计算
+        words.parallelStream()
+                .forEach(word -> {
+                    Float x1 = weights1.get(word.getName());
+                    Float x2 = weights2.get(word.getName());
+                    if (x1 == null) {
+                        x1 = 0f;
+                    }
+                    if (x2 == null) {
+                        x2 = 0f;
+                    }
+                    //(x1-x2)
+                    float oneOfTheDimension = x1 - x2;
+                    //(x1-x2)^2
+                    //+
+                    ab.addAndGet(oneOfTheDimension * oneOfTheDimension);
+                });
+
+        //distance=sqrt((x1-x2)^2+(y1-y2)^2)
+        double euclideanDistance = Math.sqrt(ab.get());
+        double score = 0;
+        if (euclideanDistance == 0) {
+            //距离为0,表示完全相同
+            score = 1.0;
+        } else {
+            //使用BigDecimal保证精确计算浮点数
+            //score = 1 / (euclideanDistance+1);
+            score = BigDecimal.valueOf(1).divide(BigDecimal.valueOf(euclideanDistance + 1), 9, BigDecimal.ROUND_HALF_UP).doubleValue();
+        }
+        if (LOGGER.isDebugEnabled()) {
+            LOGGER.debug("文本1和文本2的欧几里得距离:" + euclideanDistance);
+            LOGGER.debug("文本1和文本2的相似度分值:1 / (" + euclideanDistance + "+1)=" + score);
+        }
+        return score;
+    }
+
+    public static void main(String[] args) {
+        String text1 = "我爱购物";
+        String text2 = "我爱读书";
+        String text3 = "他是黑客";
+        TextSimilarity textSimilarity = new EuclideanDistanceTextSimilarity();
+        double score1pk1 = textSimilarity.getSimilarity(text1, text1);
+        double score1pk2 = textSimilarity.getSimilarity(text1, text2);
+        double score1pk3 = textSimilarity.getSimilarity(text1, text3);
+        double score2pk2 = textSimilarity.getSimilarity(text2, text2);
+        double score2pk3 = textSimilarity.getSimilarity(text2, text3);
+        double score3pk3 = textSimilarity.getSimilarity(text3, text3);
+        System.out.println(text1 + " 和 " + text1 + " 的相似度分值:" + score1pk1);
+        System.out.println(text1 + " 和 " + text2 + " 的相似度分值:" + score1pk2);
+        System.out.println(text1 + " 和 " + text3 + " 的相似度分值:" + score1pk3);
+        System.out.println(text2 + " 和 " + text2 + " 的相似度分值:" + score2pk2);
+        System.out.println(text2 + " 和 " + text3 + " 的相似度分值:" + score2pk3);
+        System.out.println(text3 + " 和 " + text3 + " 的相似度分值:" + score3pk3);
+    }
+}

+ 40 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/text/ITextSimilarity.java

@@ -0,0 +1,40 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.text;
+
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.ISimilarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.tokenizer.Word;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * @author itbluebox
+ */
+public interface ITextSimilarity extends ISimilarity {
+    // 阈值
+    float thresholdRate = 0.5f;
+
+    /**
+     * 词列表1和词列表2的相似度分值
+     *
+     * @param words1 词列表1
+     * @param words2 词列表2
+     * @return 相似度分值
+     */
+    double getSimilarity(List<Word> words1, List<Word> words2);
+
+    default double getSimilarity(Map<Word, Float> weights1, Map<Word, Float> weights2) {
+        List<List<Word>> words = Arrays.asList(weights1, weights2).parallelStream()
+                .map(weights -> weights.keySet().parallelStream()
+                        .map(word -> {
+                            word.setWeight(weights.get(word));
+                            return word;
+                        })
+                        .collect(Collectors.toList())).collect(Collectors.toList());
+
+        return getSimilarity(words.get(0), words.get(1));
+    }
+
+
+}

+ 99 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/text/JaccardTextSimilarity.java

@@ -0,0 +1,99 @@
+/**
+ * APDPlat - Application Product Development Platform
+ * Copyright (c) 2013, 杨尚川, yang-shangchuan@qq.com
+ * <p>
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * <p>
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * <p>
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.text;
+
+
+import com.ruoyi.utils.resumeAnalysis.similarity.tokenizer.Word;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.ConcurrentSkipListSet;
+
+/**
+ * 文本相似度计算
+ * 判定方式:Jaccard相似性系数(Jaccard similarity coefficient) ,通过计算两个集合交集的大小除以并集的大小来评估他们的相似度
+ * 算法步骤描述:
+ * 1、分词
+ * 2、求交集(去重),计算交集的不重复词的个数 intersectionSize
+ * 3、求并集(去重),计算并集的不重复词的个数 unionSize
+ * 4、2中的值除以3中的值 intersectionSize/(double)unionSize
+ * 完整计算公式:
+ * double score = intersectionSize/(double)unionSize;
+ * @author 杨尚川
+ */
+public class JaccardTextSimilarity extends TextSimilarity {
+    /**
+     * 判定相似度的方式:Jaccard相似性系数
+     * @param words1 词列表1
+     * @param words2 词列表2
+     * @return 相似度分值
+     */
+    @Override
+    protected double getSimilarityImpl(List<Word> words1, List<Word> words2) {
+        if (words1.isEmpty() && words2.isEmpty()) {
+            return 1.0;
+        }
+        //HashSet的contains性能要大于ArrayList的contains
+        Set<Word> words2Set = new HashSet<>();
+        words2Set.addAll(words2);
+        //求交集
+        Set<String> intersectionSet = new ConcurrentSkipListSet<>();
+        words1.parallelStream().forEach(word -> {
+            if (words2Set.contains(word)) {
+                intersectionSet.add(word.getName());
+            }
+        });
+        //交集的大小
+        int intersectionSize = intersectionSet.size();
+        //求并集
+        Set<String> unionSet = new HashSet<>();
+        words1.forEach(word -> unionSet.add(word.getName()));
+        words2.forEach(word -> unionSet.add(word.getName()));
+        //并集的大小
+        int unionSize = unionSet.size();
+        //相似度分值
+        double score = intersectionSize / (double) unionSize;
+        if (LOGGER.isDebugEnabled()) {
+            LOGGER.debug("交集的大小:" + intersectionSize);
+            LOGGER.debug("并集的大小:" + unionSize);
+            LOGGER.debug("相似度分值=" + intersectionSize + "/(double)" + unionSize + "=" + score);
+        }
+        return score;
+    }
+
+    public static void main(String[] args) {
+        String text1 = "我爱购物";
+        String text2 = "我爱读书";
+        String text3 = "他是黑客";
+        TextSimilarity textSimilarity = new JaccardTextSimilarity();
+        double score1pk1 = textSimilarity.getSimilarity(text1, text1);
+        double score1pk2 = textSimilarity.getSimilarity(text1, text2);
+        double score1pk3 = textSimilarity.getSimilarity(text1, text3);
+        double score2pk2 = textSimilarity.getSimilarity(text2, text2);
+        double score2pk3 = textSimilarity.getSimilarity(text2, text3);
+        double score3pk3 = textSimilarity.getSimilarity(text3, text3);
+        System.out.println(text1 + " 和 " + text1 + " 的相似度分值:" + score1pk1);
+        System.out.println(text1 + " 和 " + text2 + " 的相似度分值:" + score1pk2);
+        System.out.println(text1 + " 和 " + text3 + " 的相似度分值:" + score1pk3);
+        System.out.println(text2 + " 和 " + text2 + " 的相似度分值:" + score2pk2);
+        System.out.println(text2 + " 和 " + text3 + " 的相似度分值:" + score2pk3);
+        System.out.println(text3 + " 和 " + text3 + " 的相似度分值:" + score3pk3);
+    }
+}

+ 165 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/text/JaroDistanceTextSimilarity.java

@@ -0,0 +1,165 @@
+/**
+ * APDPlat - Application Product Development Platform
+ * Copyright (c) 2013, 杨尚川, yang-shangchuan@qq.com
+ * <p>
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * <p>
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * <p>
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.text;
+
+
+import com.ruoyi.utils.resumeAnalysis.similarity.tokenizer.Word;
+
+import java.util.List;
+
+/**
+ * 文本相似度计算
+ * 判定方式:Jaro距离(Jaro Distance),编辑距离的一种类型
+ * 这里需要注意的是Jaro距离也就是相似度分值
+ *
+ * @author 杨尚川
+ */
+public class JaroDistanceTextSimilarity extends TextSimilarity {
+    protected String shorterText = null;
+    protected String longerText = null;
+
+    /**
+     * 计算相似度分值
+     *
+     * @param words1 词列表1
+     * @param words2 词列表2
+     * @return 相似度分值
+     */
+    @Override
+    protected double getSimilarityImpl(List<Word> words1, List<Word> words2) {
+        //文本1
+        StringBuilder text1 = new StringBuilder();
+        words1.forEach(word -> text1.append(word.getName()));
+        //文本2
+        StringBuilder text2 = new StringBuilder();
+        words2.forEach(word -> text2.append(word.getName()));
+        //计算文本1和文本2的Jaro距离
+        //Jaro距离也就是相似度分值
+        double score = jaroDistance(text1.toString(), text2.toString());
+        if (LOGGER.isDebugEnabled()) {
+            LOGGER.debug("文本1:" + text1.toString());
+            LOGGER.debug("文本2:" + text2.toString());
+            LOGGER.debug("文本1和文本2的相似度分值:" + score);
+        }
+        return score;
+    }
+
+    private double jaroDistance(String text1, String text2) {
+        //假设文本1长度更短
+        shorterText = text1.toLowerCase();
+        longerText = text2.toLowerCase();
+        //如果假设不成立则交换变量的值
+        if (shorterText.length() > longerText.length()) {
+            String temp = shorterText;
+            shorterText = longerText;
+            longerText = temp;
+        }
+        //字符交集窗口大小
+        int windowLength = (shorterText.length() / 2) - 1;
+        //求字符交集,m1可能会不等于m2
+        String m1 = getCharacterConjunction(shorterText, longerText, windowLength);
+        String m2 = getCharacterConjunction(longerText, shorterText, windowLength);
+        //一种或两种情况没有字符交集,完全不相关,相似度分值为0
+        if (m1.length() == 0 || m2.length() == 0) {
+            return 0.0;
+        }
+        //交集字符个数不相等,完全不相关,相似度分值为0
+        if (m1.length() != m2.length()) {
+            return 0.0;
+        }
+        //m is the number of matching characters
+        //m1.length() == m2.length()
+        int m = m1.length();
+        //两段文本为了保持相等需要的换位次数
+        int transpositions = transpositions(m1, m2);
+        //换位次数除以2
+        //t is half the number of transpositions
+        int t = transpositions / 2;
+
+        //计算距离(这里的距离也就是相似度分值了)
+        double distance = (m / (double) shorterText.length() +
+                m / (double) longerText.length() +
+                (m - t) / (double) m) / 3.0;
+        return distance;
+    }
+
+    /**
+     * 获取两段文本的共有字符即字符交集
+     *
+     * @param text1        文本1
+     * @param text2        文本2
+     * @param windowLength 字符交集窗口大小
+     * @return 字符交集
+     */
+    private String getCharacterConjunction(String text1, String text2, int windowLength) {
+        StringBuilder conjunction = new StringBuilder();
+        StringBuilder target = new StringBuilder(text2);
+        int len1 = text1.length();
+        for (int i = 0; i < len1; i++) {
+            char source = text1.charAt(i);
+            boolean found = false;
+            int start = Math.max(0, i - windowLength);
+            int end = Math.min(i + windowLength, text2.length());
+            for (int j = start; !found && j < end; j++) {
+                if (source == target.charAt(j)) {
+                    found = true;
+                    conjunction.append(source);
+                    target.setCharAt(j, '*');
+                }
+            }
+        }
+        return conjunction.toString();
+    }
+
+    /**
+     * 计算两段文本为了保持相等需要的换位次数
+     *
+     * @param text1 文本1
+     * @param text2 文本2
+     * @return 换位次数
+     */
+    private int transpositions(String text1, String text2) {
+        int transpositions = 0;
+        for (int i = 0; i < text1.length(); i++) {
+            if (text1.charAt(i) != text2.charAt(i)) {
+                transpositions++;
+            }
+        }
+        return transpositions;
+    }
+
+    public static void main(String[] args) {
+        String text1 = "我爱购物";
+        String text2 = "我爱读书";
+        String text3 = "他是黑客";
+        TextSimilarity textSimilarity = new JaroDistanceTextSimilarity();
+        double score1pk1 = textSimilarity.getSimilarity(text1, text1);
+        double score1pk2 = textSimilarity.getSimilarity(text1, text2);
+        double score1pk3 = textSimilarity.getSimilarity(text1, text3);
+        double score2pk2 = textSimilarity.getSimilarity(text2, text2);
+        double score2pk3 = textSimilarity.getSimilarity(text2, text3);
+        double score3pk3 = textSimilarity.getSimilarity(text3, text3);
+        System.out.println(text1 + " 和 " + text1 + " 的相似度分值:" + score1pk1);
+        System.out.println(text1 + " 和 " + text2 + " 的相似度分值:" + score1pk2);
+        System.out.println(text1 + " 和 " + text3 + " 的相似度分值:" + score1pk3);
+        System.out.println(text2 + " 和 " + text2 + " 的相似度分值:" + score2pk2);
+        System.out.println(text2 + " 和 " + text3 + " 的相似度分值:" + score2pk3);
+        System.out.println(text3 + " 和 " + text3 + " 的相似度分值:" + score3pk3);
+    }
+}

+ 106 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/text/JaroWinklerDistanceTextSimilarity.java

@@ -0,0 +1,106 @@
+/**
+ * APDPlat - Application Product Development Platform
+ * Copyright (c) 2013, 杨尚川, yang-shangchuan@qq.com
+ * <p>
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * <p>
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * <p>
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.text;
+
+
+
+import com.ruoyi.utils.resumeAnalysis.similarity.tokenizer.Word;
+
+import java.util.List;
+
+/**
+ * 文本相似度计算
+ * 判定方式:Jaro–Winkler距离(Jaro–Winkler Distance),Jaro的扩展
+ * 由William E. Winkler提出,最适合计算短文本如人名的相似度
+ * 这里需要注意的是Jaro–Winkler距离也就是相似度分值
+ * @author 杨尚川
+ */
+public class JaroWinklerDistanceTextSimilarity extends JaroDistanceTextSimilarity {
+    private static final double DEFAULT_SCALING_FACTOR = 0.1;
+    private static final int MAXIMUM_CHARACTERS = 4;
+    private double scalingFactor;
+
+    public JaroWinklerDistanceTextSimilarity() {
+        this.scalingFactor = DEFAULT_SCALING_FACTOR;
+    }
+
+    /**
+     * scalingFactor的值介于闭区间[0, 0.25]
+     * @param scalingFactor
+     */
+    public JaroWinklerDistanceTextSimilarity(double scalingFactor) {
+        if (scalingFactor > 0.25) {
+            scalingFactor = 0.25;
+        }
+        if (scalingFactor < 0) {
+            scalingFactor = 0;
+        }
+        this.scalingFactor = scalingFactor;
+    }
+
+    /**
+     * 计算相似度分值
+     * @param words1 词列表1
+     * @param words2 词列表2
+     * @return 相似度分值
+     */
+    @Override
+    protected double getSimilarityImpl(List<Word> words1, List<Word> words2) {
+        double score = super.getSimilarity(words1, words2);
+
+        score += (scalingFactor * commonPrefixLength() * (1.0 - score));
+
+        return score;
+    }
+
+    /**
+     * 判断两段文本的共同前缀的字符个数,共同前缀的字符个数如果大于4则按4处理
+     * @return 整数闭区间[0, 4]
+     */
+    private int commonPrefixLength() {
+        // shorterText和longerText已经在父类JaroDistanceTextSimilarity中准备好了
+        // 这里直接用就可以
+        int result = 0;
+        int len = shorterText.length();
+        for (int i = 0; i < len; i++) {
+            if (shorterText.charAt(i) != longerText.charAt(i)) {
+                break;
+            }
+            result++;
+            if (result >= MAXIMUM_CHARACTERS) {
+                // 最多4个字符即可
+                return MAXIMUM_CHARACTERS;
+            }
+        }
+        return result;
+    }
+
+    public static void main(String[] args) {
+        String text1 = "我爱购物";
+        String text2 = "我爱读书";
+        String text3 = "他是黑客";
+        TextSimilarity textSimilarity = new JaroWinklerDistanceTextSimilarity();
+        double score1pk1 = textSimilarity.getSimilarity(text1, text1);
+        double score1pk2 = textSimilarity.getSimilarity(text1, text2);
+        double score1pk3 = textSimilarity.getSimilarity(text1, text3);
+        System.out.println(text1 + " 和 " + text1 + " 的相似度分值:" + score1pk1);
+        System.out.println(text1 + " 和 " + text2 + " 的相似度分值:" + score1pk2);
+        System.out.println(text1 + " 和 " + text3 + " 的相似度分值:" + score1pk3);
+    }
+}

+ 114 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/text/ManhattanDistanceTextSimilarity.java

@@ -0,0 +1,114 @@
+/**
+ * APDPlat - Application Product Development Platform
+ * Copyright (c) 2013, 杨尚川, yang-shangchuan@qq.com
+ * <p>
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * <p>
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * <p>
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.text;
+
+
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.util.AtomicFloat;
+import com.ruoyi.utils.resumeAnalysis.similarity.tokenizer.Word;
+
+import java.math.BigDecimal;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * 文本相似度计算
+ * 判定方式:曼哈顿距离(Manhattan Distance),通过计算两个点在标准坐标系上的绝对轴距总和来评估他们的相似度
+ * 曼哈顿距离原理:
+ * 设A(x1, y1),B(x2, y2)是平面上任意两点
+ * 两点间的距离dist(A,B)=|x1-x2|+|y1-y2|
+ * @author 杨尚川
+ */
+public class ManhattanDistanceTextSimilarity extends TextSimilarity {
+    /**
+     * 判定相似度的方式:曼哈顿距离
+     * 曼哈顿距离原理:
+     * 设A(x1, y1),B(x2, y2)是平面上任意两点
+     * 两点间的距离dist(A,B)=|x1-x2|+|y1-y2|
+     * @param words1 词列表1
+     * @param words2 词列表2
+     * @return 相似度分值
+     */
+    @Override
+    protected double getSimilarityImpl(List<Word> words1, List<Word> words2) {
+        //用词频来标注词的权重
+        taggingWeightByFrequency(words1, words2);
+        //构造权重快速搜索容器
+        Map<String, Float> weights1 = getFastSearchMap(words1);
+        Map<String, Float> weights2 = getFastSearchMap(words2);
+        //所有的不重复词
+        Set<Word> words = new HashSet<>();
+        words.addAll(words1);
+        words.addAll(words2);
+        //向量的维度为words的大小,每一个维度的权重是词频
+        //manhattanDistance=|x1-x2|+|y1-y2|
+        AtomicFloat manhattanDistance = new AtomicFloat();
+        //计算
+        words
+                .parallelStream()
+                .forEach(word -> {
+                    Float x1 = weights1.get(word.getName());
+                    Float x2 = weights2.get(word.getName());
+                    if (x1 == null) {
+                        x1 = 0f;
+                    }
+                    if (x2 == null) {
+                        x2 = 0f;
+                    }
+                    //|x1-x2|
+                    float oneOfTheDimension = Math.abs(x1 - x2);
+                    //+
+                    manhattanDistance.addAndGet(oneOfTheDimension);
+                });
+        double score = 0;
+        if (manhattanDistance.get() == 0) {
+            //距离为0,表示完全相同
+            score = 1.0;
+        } else {
+            //使用BigDecimal保证精确计算浮点数
+            //score = 1 / (double)(manhattanDistance.get()+1);
+            score = BigDecimal.valueOf(1).divide(BigDecimal.valueOf(manhattanDistance.get() + 1), 9, BigDecimal.ROUND_HALF_UP).doubleValue();
+        }
+        if (LOGGER.isDebugEnabled()) {
+            LOGGER.debug("文本1和文本2的曼哈顿距离:" + manhattanDistance.get());
+            LOGGER.debug("文本1和文本2的相似度分值:1 / (double)(" + manhattanDistance.get() + "+1)=" + score);
+        }
+        return score;
+    }
+
+    public static void main(String[] args) {
+        String text1 = "我爱购物";
+        String text2 = "我爱读书";
+        String text3 = "他是黑客";
+        TextSimilarity textSimilarity = new ManhattanDistanceTextSimilarity();
+        double score1pk1 = textSimilarity.getSimilarity(text1, text1);
+        double score1pk2 = textSimilarity.getSimilarity(text1, text2);
+        double score1pk3 = textSimilarity.getSimilarity(text1, text3);
+        double score2pk2 = textSimilarity.getSimilarity(text2, text2);
+        double score2pk3 = textSimilarity.getSimilarity(text2, text3);
+        double score3pk3 = textSimilarity.getSimilarity(text3, text3);
+        System.out.println(text1 + " 和 " + text1 + " 的相似度分值:" + score1pk1);
+        System.out.println(text1 + " 和 " + text2 + " 的相似度分值:" + score1pk2);
+        System.out.println(text1 + " 和 " + text3 + " 的相似度分值:" + score1pk3);
+        System.out.println(text2 + " 和 " + text2 + " 的相似度分值:" + score2pk2);
+        System.out.println(text2 + " 和 " + text3 + " 的相似度分值:" + score2pk3);
+        System.out.println(text3 + " 和 " + text3 + " 的相似度分值:" + score3pk3);
+    }
+}

+ 203 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/text/SimHashPlusHammingDistanceTextSimilarity.java

@@ -0,0 +1,203 @@
+/**
+ * APDPlat - Application Product Development Platform
+ * Copyright (c) 2013, 杨尚川, yang-shangchuan@qq.com
+ * <p>
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * <p>
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * <p>
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.text;
+
+import com.ruoyi.utils.resumeAnalysis.similarity.tokenizer.Word;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.math.BigInteger;
+import java.util.List;
+
+/**
+ * 文本相似度计算
+ * 判定方式:SimHash + 汉明距离(Hamming Distance)
+ * 先使用SimHash把不同长度的文本映射为等长文本,然后再计算等长文本的汉明距离
+ * <p>
+ * simhash和普通hash最大的不同在于:
+ * 普通hash对 仅有一个字节不同的文本 会映射成 两个完全不同的哈希结果
+ * simhash对 相似的文本 会映射成 相似的哈希结果
+ * <p>
+ * 汉明距离是以美国数学家Richard Wesley Hamming的名字命名的
+ * 两个等长字符串之间的汉明距离是两个字符串相应位置的不同字符的个数
+ * 换句话说,它就是将一个字符串变换成另外一个字符串所需要替换的字符个数
+ * <p>
+ * 比如:
+ * 1011101 与 1001001 之间的汉明距离是 2
+ * 2143896 与 2233796 之间的汉明距离是 3
+ * toned 与 roses 之间的汉明距离是 3
+ *
+ * @author 杨尚川
+ */
+public class SimHashPlusHammingDistanceTextSimilarity extends TextSimilarity {
+    private static final Logger LOGGER = LoggerFactory.getLogger(SimHashPlusHammingDistanceTextSimilarity.class);
+
+    private int hashBitCount = 128;
+
+    public SimHashPlusHammingDistanceTextSimilarity() {
+    }
+
+    public SimHashPlusHammingDistanceTextSimilarity(int hashBitCount) {
+        this.hashBitCount = hashBitCount;
+    }
+
+    public int getHashBitCount() {
+        return hashBitCount;
+    }
+
+    public void setHashBitCount(int hashBitCount) {
+        this.hashBitCount = hashBitCount;
+    }
+
+    /**
+     * 计算相似度分值
+     *
+     * @param words1 词列表1
+     * @param words2 词列表2
+     * @return 相似度分值
+     */
+    @Override
+    protected double getSimilarityImpl(List<Word> words1, List<Word> words2) {
+        //用词频来标注词的权重
+        taggingWeightByFrequency(words1, words2);
+        //计算SimHash
+        String simHash1 = simHash(words1);
+        String simHash2 = simHash(words2);
+        //计算SimHash值之间的汉明距离
+        int hammingDistance = hammingDistance(simHash1, simHash2);
+        if (hammingDistance == -1) {
+            LOGGER.error("文本1:" + words1.toString());
+            LOGGER.error("文本2:" + words2.toString());
+            LOGGER.error("文本1SimHash值:" + simHash1);
+            LOGGER.error("文本2SimHash值:" + simHash2);
+            LOGGER.error("文本1和文本2的SimHash值长度不相等,不能计算汉明距离");
+            return 0.0;
+        }
+        int maxDistance = simHash1.length();
+        double score = (1 - hammingDistance / (double) maxDistance);
+        if (LOGGER.isDebugEnabled()) {
+            LOGGER.debug("文本1:" + words1.toString());
+            LOGGER.debug("文本2:" + words2.toString());
+            LOGGER.debug("文本1SimHash值:" + simHash1);
+            LOGGER.debug("文本2SimHash值:" + simHash2);
+            LOGGER.debug("hashBitCount:" + hashBitCount);
+            LOGGER.debug("SimHash值之间的汉明距离:" + hammingDistance);
+            LOGGER.debug("文本1和文本2的相似度分值:1 - " + hammingDistance + " / (double)" + maxDistance + "=" + score);
+        }
+        return score;
+    }
+
+    /**
+     * 计算词列表的SimHash值
+     *
+     * @param words 词列表
+     * @return SimHash值
+     */
+    private String simHash(List<Word> words) {
+        float[] hashBit = new float[hashBitCount];
+        words.forEach(word -> {
+            float weight = word.getWeight() == null ? 1 : word.getWeight();
+            BigInteger hash = hash(word.getName());
+            for (int i = 0; i < hashBitCount; i++) {
+                BigInteger bitMask = new BigInteger("1").shiftLeft(i);
+                if (hash.and(bitMask).signum() != 0) {
+                    hashBit[i] += weight;
+                } else {
+                    hashBit[i] -= weight;
+                }
+            }
+        });
+        StringBuffer fingerprint = new StringBuffer();
+        for (int i = 0; i < hashBitCount; i++) {
+            if (hashBit[i] >= 0) {
+                fingerprint.append("1");
+            } else {
+                fingerprint.append("0");
+            }
+        }
+        return fingerprint.toString();
+    }
+
+    /**
+     * 计算词的哈希值
+     *
+     * @param word 词
+     * @return 哈希值
+     */
+    private BigInteger hash(String word) {
+        if (word == null || word.length() == 0) {
+            return new BigInteger("0");
+        }
+        char[] charArray = word.toCharArray();
+        BigInteger x = BigInteger.valueOf(((long) charArray[0]) << 7);
+        BigInteger m = new BigInteger("1000003");
+        BigInteger mask = new BigInteger("2").pow(hashBitCount).subtract(new BigInteger("1"));
+        long sum = 0;
+        for (char c : charArray) {
+            sum += c;
+        }
+        x = x.multiply(m).xor(BigInteger.valueOf(sum)).and(mask);
+        x = x.xor(new BigInteger(String.valueOf(word.length())));
+        if (x.equals(new BigInteger("-1"))) {
+            x = new BigInteger("-2");
+        }
+        return x;
+    }
+
+    /**
+     * 计算等长的SimHash值的汉明距离
+     * 如不能比较距离(比较的两段文本长度不相等),则返回-1
+     *
+     * @param simHash1 SimHash值1
+     * @param simHash2 SimHash值2
+     * @return 汉明距离
+     */
+    private int hammingDistance(String simHash1, String simHash2) {
+        if (simHash1.length() != simHash2.length()) {
+            return -1;
+        }
+        int distance = 0;
+        int len = simHash1.length();
+        for (int i = 0; i < len; i++) {
+            if (simHash1.charAt(i) != simHash2.charAt(i)) {
+                distance++;
+            }
+        }
+        return distance;
+    }
+
+    public static void main(String[] args) throws Exception {
+        String text1 = "我爱购物";
+        String text2 = "我爱读书";
+        String text3 = "他是黑客";
+        TextSimilarity textSimilarity = new SimHashPlusHammingDistanceTextSimilarity();
+        double score1pk1 = textSimilarity.getSimilarity(text1, text1);
+        double score1pk2 = textSimilarity.getSimilarity(text1, text2);
+        double score1pk3 = textSimilarity.getSimilarity(text1, text3);
+        double score2pk2 = textSimilarity.getSimilarity(text2, text2);
+        double score2pk3 = textSimilarity.getSimilarity(text2, text3);
+        double score3pk3 = textSimilarity.getSimilarity(text3, text3);
+        System.out.println(text1 + " 和 " + text1 + " 的相似度分值:" + score1pk1);
+        System.out.println(text1 + " 和 " + text2 + " 的相似度分值:" + score1pk2);
+        System.out.println(text1 + " 和 " + text3 + " 的相似度分值:" + score1pk3);
+        System.out.println(text2 + " 和 " + text2 + " 的相似度分值:" + score2pk2);
+        System.out.println(text2 + " 和 " + text3 + " 的相似度分值:" + score2pk3);
+        System.out.println(text3 + " 和 " + text3 + " 的相似度分值:" + score3pk3);
+    }
+}

+ 144 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/text/TextSimilarity.java

@@ -0,0 +1,144 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.text;
+
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.util.StringUtil;
+import com.ruoyi.utils.resumeAnalysis.similarity.tokenizer.Tokenizer;
+import com.ruoyi.utils.resumeAnalysis.similarity.tokenizer.Word;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * 文本相似度
+ *
+ * @author itbluebox
+ */
+public abstract class TextSimilarity implements ITextSimilarity {
+
+    protected static final Logger LOGGER = LoggerFactory.getLogger(TextSimilarity.class);
+
+    @Override
+    public double getSimilarity(String text1, String text2) {
+        if (LOGGER.isDebugEnabled()) {
+            LOGGER.debug("text1:" + text1);
+            LOGGER.debug("text2:" + text2);
+        }
+        if (StringUtil.isBlank(text1) && StringUtil.isBlank(text2)) {
+            return 1.0;
+        }
+        if (StringUtil.isBlank(text1) || StringUtil.isBlank(text2)) {
+            return 0.0;
+        }
+        if (text1.equalsIgnoreCase(text2)) {
+            return 1.0;
+        }
+        List<Word> words1 = Tokenizer.segment(text1);
+        List<Word> words2 = Tokenizer.segment(text2);
+        return getSimilarity(words1, words2);
+    }
+
+    @Override
+    public double getSimilarity(List<Word> words1, List<Word> words2) {
+        if (StringUtil.isBlank(words1) && StringUtil.isBlank(words2)) {
+            return 1.0;
+        }
+        if (StringUtil.isBlank(words1) || StringUtil.isBlank(words2)) {
+            return 0.0;
+        }
+        if (LOGGER.isDebugEnabled()) {
+            LOGGER.debug("词列表1:");
+            LOGGER.debug("\t" + words1);
+            LOGGER.debug("词列表2:");
+            LOGGER.debug("\t" + words2);
+        }
+
+        double score = getSimilarityImpl(words1, words2);
+        if (LOGGER.isDebugEnabled()) {
+            LOGGER.debug("score:" + score);
+        }
+        score = (int) (score * 1000000 + 0.5) / (double) 1000000;
+        if (LOGGER.isDebugEnabled())
+            LOGGER.debug("分值,四舍五入:" + score);
+
+        return score;
+    }
+
+    protected abstract double getSimilarityImpl(List<Word> words1, List<Word> words2);
+
+    protected void taggingWeightByFrequency(List<Word> words1, List<Word> words2) {
+        if (words1.get(0).getWeight() != null || words2.get(0).getWeight() != null) {
+            return;
+        }
+        Map<String, AtomicInteger> frequency1 = getFrequency(words1);
+        Map<String, AtomicInteger> frequency2 = getFrequency(words2);
+        //输出词频统计信息
+        if (LOGGER.isDebugEnabled()) {
+            LOGGER.debug("词频统计1:\n{}", getWordsFrequencyString(frequency1));
+            LOGGER.debug("词频统计2:\n{}", getWordsFrequencyString(frequency2));
+        }
+        // 标注权重
+        words1.parallelStream().forEach(word -> word.setWeight(frequency1.get(word.getName()).floatValue()));
+        words2.parallelStream().forEach(word -> word.setWeight(frequency2.get(word.getName()).floatValue()));
+    }
+
+    /**
+     * 统计词频
+     *
+     * @param words 词列表
+     * @return 词频统计图
+     */
+    private Map<String, AtomicInteger> getFrequency(List<Word> words) {
+        Map<String, AtomicInteger> freq = new HashMap<>();
+        words.forEach(i -> freq.computeIfAbsent(i.getName(), k -> new AtomicInteger()).incrementAndGet());
+        return freq;
+    }
+
+    /**
+     * 词频统计信息
+     *
+     * @param frequency 词频
+     * @return
+     */
+    private String getWordsFrequencyString(Map<String, AtomicInteger> frequency) {
+        StringBuilder str = new StringBuilder();
+        if (frequency != null && !frequency.isEmpty()) {
+            AtomicInteger integer = new AtomicInteger();
+            frequency.entrySet()
+                    .stream()
+                    .sorted((a, b) -> b.getValue().get() - a.getValue().get())
+                    .forEach(i -> str.append("\t")
+                            .append(integer.incrementAndGet())
+                            .append("、")
+                            .append(i.getKey())
+                            .append("=")
+                            .append(i.getValue())
+                            .append("\n")
+                    );
+        }
+        str.setLength(str.length() - 1);
+        return str.toString();
+    }
+
+    /**
+     * 构造权重快速搜索容器
+     *
+     * @param words
+     * @return
+     */
+    protected Map<String, Float> getFastSearchMap(List<Word> words) {
+        Map<String, Float> weightMap = new ConcurrentHashMap<>();
+        if (words == null) return weightMap;
+        words.parallelStream().forEach(i -> {
+            if (i.getWeight() != null) {
+                weightMap.put(i.getName(), i.getWeight());
+            } else {
+                LOGGER.error("no word weight info:" + i.getName());
+            }
+        });
+        return weightMap;
+    }
+}

+ 99 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/util/AtomicFloat.java

@@ -0,0 +1,99 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.util;
+
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * jdk没有AtomicFloat,写一个
+ *
+ * @author itbluebox
+ */
+public class AtomicFloat extends Number {
+    private AtomicInteger bits;
+
+    public AtomicFloat() {
+        this(0f);
+    }
+
+    public AtomicFloat(float initialValue) {
+        bits = new AtomicInteger(Float.floatToIntBits(initialValue));
+    }
+
+    public final float addAndGet(float delta) {
+        float expect;
+        float update;
+        do {
+            expect = get();
+            update = expect + delta;
+        } while (!this.compareAndSet(expect, update));
+
+        return update;
+    }
+
+    public final float getAndAdd(float delta) {
+        float expect;
+        float update;
+        do {
+            expect = get();
+            update = expect + delta;
+        } while (!this.compareAndSet(expect, update));
+
+        return expect;
+    }
+
+    public final float getAndDecrement() {
+        return getAndAdd(-1);
+    }
+
+    public final float decrementAndGet() {
+        return addAndGet(-1);
+    }
+
+    public final float getAndIncrement() {
+        return getAndAdd(1);
+    }
+
+    public final float incrementAndGet() {
+        return addAndGet(1);
+    }
+
+    public final float getAndSet(float newValue) {
+        float expect;
+        do {
+            expect = get();
+        } while (!this.compareAndSet(expect, newValue));
+
+        return expect;
+    }
+
+    public final boolean compareAndSet(float expect, float update) {
+        return bits.compareAndSet(Float.floatToIntBits(expect), Float.floatToIntBits(update));
+    }
+
+    public final void set(float newValue) {
+        bits.set(Float.floatToIntBits(newValue));
+    }
+
+    public final float get() {
+        return Float.intBitsToFloat(bits.get());
+    }
+
+    public float floatValue() {
+        return get();
+    }
+
+    public double doubleValue() {
+        return (double) floatValue();
+    }
+
+    public int intValue() {
+        return (int) get();
+    }
+
+    public long longValue() {
+        return (long) get();
+    }
+
+    public String toString() {
+        return Float.toString(get());
+    }
+}

+ 36 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/util/DicReader.java

@@ -0,0 +1,36 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.util;
+
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
+
+/**
+ * 加载词典用的类
+ * 
+ * @author ansj
+ */
+public class DicReader {
+
+	private static final Logger LOGGER = LoggerFactory.getLogger(DicReader.class);
+	public static BufferedReader getReader(String name) {
+		// maven工程修改词典加载方式
+		InputStream in = DicReader.class.getResourceAsStream("/" + name);
+		try {
+			return new BufferedReader(new InputStreamReader(in, "UTF-8"));
+		} catch (UnsupportedEncodingException e) {
+			LOGGER.warn("不支持的编码", e);
+		}
+		return null;
+	}
+
+	public static InputStream getInputStream(String name) {
+		// maven工程修改词典加载方式
+		InputStream in = DicReader.class.getResourceAsStream("/" + name);
+		return in;
+	}
+}

+ 141 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/util/EditDistance.java

@@ -0,0 +1,141 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.util;
+
+/**
+ * This class computes the edit distance between two strings using dynamic
+ * programming. The dynamic programming part is in the method
+ * printEditDistance().
+ */
+public class EditDistance {
+
+    /**
+     * 获取删除代价
+     *
+     * @return
+     */
+    public int getDeletionCost() {
+        return 1;
+    }
+
+    /**
+     * 获取插入代价
+     *
+     * @return
+     */
+    public int getInsertionCost() {
+        return 1;
+    }
+
+    /**
+     * 获取替换代价
+     *
+     * @return
+     */
+    public int getSubstitutionCost(char a, char b) {
+        return (a == b) ? 0 : 1;
+    }
+
+    public int getEditDistance(String S, String T) {
+        int[][] D = null;
+        if (S == null)
+            S = "";
+        if (T == null)
+            T = "";
+
+        char[] a = S.toCharArray();
+        char[] b = T.toCharArray();
+
+        int n = a.length; // 字符串S的长度
+        int m = b.length; // 字符串T的长度
+
+        if (a.length == 0) {
+            return b.length;
+        } else if (b.length == 0) {
+            return a.length;
+        }
+
+        D = new int[a.length + 1][b.length + 1];
+
+        /** 初始化D[i][0] */
+        for (int i = 1; i <= n; i++) {
+            D[i][0] = D[i - 1][0] + getDeletionCost();
+        }
+
+        /** 初始化D[0][j] */
+        for (int j = 1; j <= m; j++) {
+            D[0][j] = D[0][j - 1] + getInsertionCost();
+        }
+
+        for (int i = 1; i <= n; i++) {
+            for (int j = 1; j <= m; j++) {
+                D[i][j] = MathUtil.min(D[i - 1][j] + getDeletionCost(), D[i][j - 1] + getInsertionCost(),
+                        D[i - 1][j - 1] + getSubstitutionCost(a[i - 1], b[j - 1]));
+            }
+        }
+
+        return D[n][m];
+    }
+
+    /**
+     * 应与getEditDistance(S, T)等同
+     *
+     * @param s
+     * @param t
+     * @return
+     */
+    public static int getLevenshteinDistance(String s, String t) {
+        if (s == null || t == null) {
+            throw new IllegalArgumentException("Strings must not be null");
+        }
+        int d[][]; // matrix
+        int n; // length of s
+        int m; // length of t
+        int i; // iterates through s
+        int j; // iterates through t
+        char s_i; // ith character of s
+        char t_j; // jth character of t
+        int cost; // cost
+
+        // Step 1
+        n = s.length();
+        m = t.length();
+        if (n == 0) {
+            return m;
+        }
+        if (m == 0) {
+            return n;
+        }
+        d = new int[n + 1][m + 1];
+
+        // Step 2
+        for (i = 0; i <= n; i++) {
+            d[i][0] = i;
+        }
+        for (j = 0; j <= m; j++) {
+            d[0][j] = j;
+        }
+
+        // Step 3
+        for (i = 1; i <= n; i++) {
+            s_i = s.charAt(i - 1);
+
+            // Step 4
+            for (j = 1; j <= m; j++) {
+                t_j = t.charAt(j - 1);
+
+                // Step 5
+                if (s_i == t_j) {
+                    cost = 0;
+                } else {
+                    cost = 1;
+                }
+
+                // Step 6
+                d[i][j] = MathUtil.min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost);
+            }
+        }
+
+        // Step 7
+        return d[n][m];
+    }
+
+}

+ 66 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/util/FileUtil.java

@@ -0,0 +1,66 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.util;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.*;
+
+/**
+ * 文件工具类
+ *
+ * @author itbluebox
+ */
+public class FileUtil {
+    private static final Logger logger = LoggerFactory.getLogger(FileUtil.class);
+
+    /**
+     * 从指定流读入每一行文字
+     *
+     * @param input    输入流
+     * @param encoding 编码
+     * @param event    触发的事件
+     * @throws IOException
+     */
+    public static void traverseLines(InputStream input, String encoding, TraverseEvent<String> event)
+            throws IOException {
+        BufferedReader in = new BufferedReader(new InputStreamReader(input, encoding));
+        String line;
+
+        while ((line = in.readLine()) != null) {
+            event.visit(line);
+        }
+
+        input.close();
+        in.close();
+    }
+
+    /**
+     * 保存字符串到文件中
+     *
+     * @param content
+     * @param fileName
+     * @return
+     */
+    public static boolean saveStringToFile(String content, String fileName) {
+        boolean rtn = false;
+        BufferedOutputStream out = null;
+        try {
+            File file = new File(fileName);
+            file.getParentFile().mkdirs();
+
+            out = new BufferedOutputStream(new FileOutputStream(file));
+            out.write(content.getBytes("GBK"));
+            out.close();
+            rtn = true;
+        } catch (Exception e) {
+            logger.error("saveStringToFile error:{}", e.getMessage());
+        } finally {
+            try {
+                out.close();
+            } catch (Exception e) {
+                logger.error("Exception:{}", e.getMessage());
+            }
+        }
+        return rtn;
+    }
+}

+ 20 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/util/MathUtil.java

@@ -0,0 +1,20 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.util;
+
+/**
+ * 简单比较大小
+ */
+public class MathUtil {
+
+    public static int min(int... values) {
+        int min = Integer.MAX_VALUE;
+        for (int v : values) {
+            min = (v < min) ? v : min;
+        }
+        return min;
+    }
+
+    public static int max(int a, int b) {
+        return a > b ? a : b;
+    }
+
+}

+ 7 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/util/SimilarityUtil.java

@@ -0,0 +1,7 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.util;
+
+/**
+ * @author itbluebox
+ */
+public class SimilarityUtil {
+}

+ 367 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/util/StringUtil.java

@@ -0,0 +1,367 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.util;
+
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * 字符串处理工具类
+ *
+ * @author itbluebox
+ */
+public class StringUtil {
+    private static final String EMPTY = "";
+    private static final String NULL = "null";
+
+    private static String REGEX_CHINESE = "[\u4e00-\u9fa5]";// 中文正则
+
+    /**
+     * 判断字符串是否为空
+     *
+     * @param cs
+     * @return
+     */
+    public static boolean isBlank(CharSequence cs) {
+        int strLen;
+        if (cs == null || (strLen = cs.length()) == 0) {
+            return true;
+        }
+        for (int i = 0; i < strLen; i++) {
+            if (Character.isWhitespace(cs.charAt(i)) == false) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * 判断字符串是否不为空
+     *
+     * @param cs
+     * @return
+     */
+    public static boolean isNotBlank(CharSequence cs) {
+        return !isBlank(cs);
+
+    }
+
+    /**
+     * 判断字符串s是否是空串
+     *
+     * @return
+     */
+    public static boolean isBlank(String string) {
+        return string == null || string.trim().equals("");
+    }
+
+    /**
+     * 判断数组是否是空
+     *
+     * @param array
+     * @return
+     */
+    public static boolean isBlank(Object[] array) {
+        return array == null || array.length == 0;
+    }
+
+    /**
+     * 判断集合是否是空
+     *
+     * @param array
+     * @return
+     */
+    public static boolean isBlank(Collection<? extends Object> array) {
+        return array == null || array.size() == 0;
+    }
+
+    /**
+     * 判断集合
+     *
+     */
+    public static boolean isNotBlank(Collection<? extends Object> array) {
+        return !isBlank(array);
+
+    }
+    /**
+     * 判断所有的集合是否都为空
+     *
+     * @param collections
+     * @return
+     */
+    public static boolean isBlankAll(Collection<?>... collections) {
+        for (Collection<?> c : collections) {
+            if (!isBlank(c)) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    /**
+     * 判断字符串strings中是否都是空串
+     *
+     * @param strings
+     * @return
+     */
+    public static boolean isBlankAll(String... strings) {
+        for (String s : strings) {
+            if (!isBlank(s)) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    /**
+     * 判断collections集合中是否至少有一个为空
+     *
+     * @param collections
+     * @return
+     */
+    public static boolean isBlankAtLeastOne(Collection<?>... collections) {
+        for (Collection<?> c : collections) {
+            if (isBlank(c)) {
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    /**
+     * 判断字符串strings中是否之首有一个为空
+     *
+     * @param strings
+     * @return
+     */
+    public static boolean isBlankAtLeastOne(String... strings) {
+        for (String s : strings) {
+            if (isBlank(s)) {
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    /**
+     * 将一个字符串.转换成排序后的字符数组
+     *
+     * @param str
+     * @return
+     */
+    public static char[] sortCharArray(String str) {
+        char[] chars = str.toCharArray();
+        Arrays.sort(chars);
+        return chars;
+    }
+
+    public static String joiner(int[] ints, String split) {
+
+        if (ints.length == 0) {
+            return EMPTY;
+        }
+
+        StringBuilder sb = new StringBuilder(String.valueOf(ints[0]));
+
+        for (int i = 1; i < ints.length; i++) {
+            sb.append(split);
+            sb.append(ints[i]);
+        }
+
+        return sb.toString();
+    }
+
+    public static String joiner(double[] doubles, String split) {
+
+        if (doubles.length == 0) {
+            return EMPTY;
+        }
+
+        StringBuilder sb = new StringBuilder(String.valueOf(doubles[0]));
+
+        for (int i = 1; i < doubles.length; i++) {
+            sb.append(split);
+            sb.append(doubles[i]);
+        }
+
+        return sb.toString();
+    }
+
+    public static String joiner(float[] floats, String split) {
+
+        if (floats.length == 0) {
+            return EMPTY;
+        }
+
+        StringBuilder sb = new StringBuilder(String.valueOf(floats[0]));
+
+        for (int i = 1; i < floats.length; i++) {
+            sb.append(split);
+            sb.append(floats[i]);
+        }
+
+        return sb.toString();
+    }
+
+    public static String joiner(long[] longs, String split) {
+
+        if (longs.length == 0) {
+            return EMPTY;
+        }
+
+        StringBuilder sb = new StringBuilder(String.valueOf(longs[0]));
+
+        for (int i = 1; i < longs.length; i++) {
+            sb.append(split);
+            sb.append(longs[i]);
+        }
+
+        return sb.toString();
+    }
+
+    public static String toString(Object obj) {
+        if (obj == null) {
+            return NULL;
+        } else {
+            return obj.toString();
+        }
+    }
+
+    public static String joiner(Collection<?> c, String split) {
+
+        Iterator<?> iterator = c.iterator();
+
+        if (!iterator.hasNext()) {
+            return EMPTY;
+        }
+
+        StringBuilder sb = new StringBuilder(iterator.next().toString());
+
+        while (iterator.hasNext()) {
+            sb.append(split);
+            sb.append(toString(iterator.next()).toString());
+        }
+
+        return sb.toString();
+    }
+
+    public static boolean isBlank(char[] chars) {
+        // TODO Auto-generated method stub
+        int strLen;
+        if (chars == null || (strLen = chars.length) == 0) {
+            return true;
+        }
+        for (int i = 0; i < strLen; i++) {
+            if (Character.isWhitespace(chars[i]) == false) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * 正则匹配第一个
+     *
+     * @param regex
+     * @param input
+     * @return
+     */
+    public static String matcherFirst(String regex, String input) {
+        Matcher matcher = Pattern.compile(regex).matcher(input); // 读取特征个数
+        if (matcher.find()) {
+            return input.substring(matcher.start(), matcher.end());
+        } else {
+            return null;
+        }
+    }
+
+    /**
+     * trim 一个字符串.扩展了string类原生的trim.对BOM和中文空格进行trim
+     *
+     * @return
+     */
+    public static String trim(String value) {
+
+        if (value == null) {
+            return value;
+        }
+
+        int len = value.length();
+
+        int st = 0;
+
+        while ((st < len) && (Character.isWhitespace(value.charAt(st)) || value.charAt(st) == 65279 || value.charAt(st) == 160 || value.charAt(st) == 12288)) {
+            st++;
+        }
+        while ((st < len) && (Character.isWhitespace(value.charAt(len - 1)) || value.charAt(st) == 160 || value.charAt(st) == 12288)) {
+            len--;
+        }
+        return ((st > 0) || (len < value.length())) ? value.substring(st, len) : value;
+    }
+
+    /**
+     * 正则匹配全部
+     *
+     * @param regex
+     * @param input
+     * @return
+     */
+    public static List<String> matcherAll(String regex, String input) {
+        List<String> result = new ArrayList<String>();
+        Matcher matcher = Pattern.compile(regex).matcher(input); // 读取特征个数
+        while (matcher.find()) {
+            result.add(input.substring(matcher.start(), matcher.end()));
+        }
+        return result;
+    }
+
+    /**
+     * 正则匹配全部结果
+     *
+     * @param regex
+     * @param input
+     * @return
+     */
+    public static Map<Integer, String> matcherAll2Map(String regex, String input) {
+        Map<Integer, String> result = new HashMap<Integer, String>();
+        Matcher matcher = Pattern.compile(regex).matcher(input);
+        for (int i = 0; matcher.find(); i++) {
+            result.put(i, matcher.group());
+        }
+        return result;
+    }
+
+    /**
+     * 正则匹配最后
+     *
+     * @param regex
+     * @param input
+     * @return
+     */
+    public static String matcherLast(String regex, String input) {
+        List<String> result = matcherAll(regex, input);
+        if (result.size() == 0) {
+            return null;
+        } else {
+            return result.get(result.size() - 1);
+        }
+    }
+
+
+    public static String getLongString(String word1, String word2) {
+        return word1.length() >= word2.length() ? word1 : word2;
+    }
+
+    public static String getShortString(String word1, String word2) {
+        return word1.length() < word2.length() ? word1 : word2;
+    }
+
+
+    public static String removeStrChinese(String str) {
+        return str.replaceAll(REGEX_CHINESE, "");
+    }
+
+}

+ 16 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/util/TraverseEvent.java

@@ -0,0 +1,16 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.util;
+
+/**
+ * 遍历接口
+ *
+ * @author itbluebox
+ */
+public interface TraverseEvent<T> {
+    /**
+     * 遍历每一个
+     *
+     * @param item
+     * @return
+     */
+    boolean visit(T item);
+}

+ 34 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/util/XmlException.java

@@ -0,0 +1,34 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.util;
+
+/**
+ * Runtime exception for XML handling.
+ */
+public class XmlException extends RuntimeException {
+
+    private static final long serialVersionUID = 381260478228427716L;
+
+    public static final String XML_PAYLOAD_EMPTY = "xml.payload.empty";
+    public static final String XML_ENCODE_ERROR = "xml.encoding.invalid";
+    public static final String FILE_NOT_FOUND = "xml.file.not.found";
+    public static final String XML_PARSE_ERROR = "xml.parse.error";
+    public static final String XML_READ_ERROR = "xml.read.error";
+    public static final String XML_VALIDATE_ERROR = "xml.validate.error";
+    public static final String XML_TRANSFORM_ERROR = "xml.transform.error";
+
+    public XmlException() {
+        super();
+    }
+
+    public XmlException(String key, Throwable cause) {
+        super(key, cause);
+    }
+
+    public XmlException(String key) {
+        super(key);
+    }
+
+    public XmlException(Throwable cause) {
+        super(cause);
+    }
+
+}

+ 549 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/util/XmlUtils.java

@@ -0,0 +1,549 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.util;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
+
+import javax.xml.XMLConstants;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.transform.*;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+import javax.xml.transform.stream.StreamSource;
+import javax.xml.validation.Schema;
+import javax.xml.validation.SchemaFactory;
+import javax.xml.validation.Validator;
+import java.io.*;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
+/**
+ * Encapsulating XML common operations.
+ */
+public final class XmlUtils {
+
+    private static final String XMLNS_XSI = "xmlns:xsi";
+    private static final String XSI_SCHEMA_LOCATION = "xsi:schemaLocation";
+    private static final String LOGIC_YES = "yes";
+    private static final String DEFAULT_ENCODE = "UTF-8";
+    private static final String REG_INVALID_CHARS = "&#\\d+;";
+
+    /**
+     * Creates a new document instance.
+     *
+     * @return a new document instance
+     * @throws XmlException problem creating a new document
+     */
+    public static Document newDocument() throws XmlException {
+        Document doc = null;
+
+        try {
+            doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
+        } catch (ParserConfigurationException e) {
+            throw new XmlException(e);
+        }
+
+        return doc;
+    }
+
+    /**
+     * Parses the content of the given XML file as an XML document.
+     *
+     * @param file the XML file instance
+     * @return the document instance representing the entire XML document
+     * @throws XmlException problem parsing the XML file
+     */
+    public static Document getDocument(File file) throws XmlException {
+        InputStream in = getInputStream(file);
+        return getDocument(in);
+    }
+
+    /**
+     * Parses the content of the given stream as an XML document.
+     *
+     * @param in the XML file input stream
+     * @return the document instance representing the entire XML document
+     * @throws XmlException problem parsing the XML input stream
+     */
+    public static Document getDocument(InputStream in) throws XmlException {
+        Document doc = null;
+
+        try {
+            DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
+            doc = builder.parse(in);
+        } catch (ParserConfigurationException e) {
+            throw new XmlException(e);
+        } catch (SAXException e) {
+            throw new XmlException(XmlException.XML_PARSE_ERROR, e);
+        } catch (IOException e) {
+            throw new XmlException(XmlException.XML_READ_ERROR, e);
+        } finally {
+            if (in != null) {
+                try {
+                    in.close();
+                } catch (IOException e) {
+                    // TODO
+                }
+            }
+        }
+
+        return doc;
+    }
+
+    /**
+     * Creates a root element as well as a new document with specific tag name.
+     *
+     * @param tagName the name of the root element
+     * @return a new element instance
+     * @throws XmlException problem generating a new document
+     */
+    public static Element createRootElement(String tagName) throws XmlException {
+        Document doc = newDocument();
+        Element root = doc.createElement(tagName);
+        doc.appendChild(root);
+        return root;
+    }
+
+    /**
+     * Gets the root element from input stream.
+     *
+     * @param in the XML file input stream
+     * @return the root element of parsed document
+     * @throws XmlException problem parsing the XML file input stream
+     */
+    public static Element getRootElementFromStream(InputStream in) throws XmlException {
+        return getDocument(in).getDocumentElement();
+    }
+
+    /**
+     * Gets the root element from given XML file.
+     *
+     * @param fileName the name of the XML file
+     * @return the root element of parsed document
+     * @throws XmlException problem parsing the XML file
+     */
+    public static Element getRootElementFromFile(File file) throws XmlException {
+        return getDocument(file).getDocumentElement();
+    }
+
+    /**
+     * Gets the root element from the given XML payload.
+     *
+     * @param payload the XML payload representing the XML file.
+     * @return the root element of parsed document
+     * @throws XmlException problem parsing the XML payload
+     */
+    public static Element getRootElementFromString(String payload) throws XmlException {
+        if (payload == null || payload.trim().length() < 1) {
+            throw new XmlException(XmlException.XML_PAYLOAD_EMPTY);
+        }
+
+        byte[] bytes = null;
+
+        try {
+            bytes = payload.getBytes(DEFAULT_ENCODE);
+        } catch (UnsupportedEncodingException e) {
+            throw new XmlException(XmlException.XML_ENCODE_ERROR, e);
+        }
+
+        InputStream in = new ByteArrayInputStream(bytes);
+        return getDocument(in).getDocumentElement();
+    }
+
+    /**
+     * Gets the descendant elements list from the parent element.
+     *
+     * @param parent  the parent element in the element tree
+     * @param tagName the specified tag name
+     * @return the NOT NULL descendant elements list
+     */
+    public static List<Element> getElements(Element parent, String tagName) {
+        NodeList nodes = parent.getElementsByTagName(tagName);
+        List<Element> elements = new ArrayList<>();
+
+        for (int i = 0; i < nodes.getLength(); i++) {
+            Node node = nodes.item(i);
+            if (node instanceof Element) {
+                elements.add((Element) node);
+            }
+        }
+
+        return elements;
+    }
+
+    /**
+     * Gets the immediately descendant element from the parent element.
+     *
+     * @param parent  the parent element in the element tree
+     * @param tagName the specified tag name.
+     * @return immediately descendant element of parent element, NULL otherwise.
+     */
+    public static Element getElement(Element parent, String tagName) {
+        List<Element> children = getElements(parent, tagName);
+
+        if (children.isEmpty()) {
+            return null;
+        } else {
+            return children.get(0);
+        }
+    }
+
+    /**
+     * Gets the immediately child elements list from the parent element.
+     *
+     * @param parent  the parent element in the element tree
+     * @param tagName the specified tag name
+     * @return the NOT NULL immediately child elements list
+     */
+    public static List<Element> getChildElements(Element parent, String tagName) {
+        NodeList nodes = parent.getElementsByTagName(tagName);
+        List<Element> elements = new ArrayList<>();
+
+        for (int i = 0; i < nodes.getLength(); i++) {
+            Node node = nodes.item(i);
+            if (node instanceof Element && node.getParentNode() == parent) {
+                elements.add((Element) node);
+            }
+        }
+
+        return elements;
+    }
+
+    /**
+     * Gets the immediately child element from the parent element.
+     *
+     * @param parent  the parent element in the element tree
+     * @param tagName the specified tag name
+     * @return immediately child element of parent element, NULL otherwise
+     */
+    public static Element getChildElement(Element parent, String tagName) {
+        List<Element> children = getChildElements(parent, tagName);
+
+        if (children.isEmpty()) {
+            return null;
+        } else {
+            return children.get(0);
+        }
+    }
+
+    /**
+     * Gets the value of the child element by tag name under the given parent
+     * element. If there is more than one child element, return the value of the
+     * first one.
+     *
+     * @param parent  the parent element
+     * @param tagName the tag name of the child element
+     * @return value of the first child element, NULL if tag not exists
+     */
+    public static String getElementValue(Element parent, String tagName) {
+        String value = null;
+
+        Element element = getElement(parent, tagName);
+        if (element != null) {
+            value = element.getTextContent();
+        }
+
+        return value;
+    }
+
+    /**
+     * Appends the child element to the parent element.
+     *
+     * @param parent  the parent element
+     * @param tagName the child element name
+     * @return the child element added to the parent element
+     */
+    public static Element appendElement(Element parent, String tagName) {
+        Element child = parent.getOwnerDocument().createElement(tagName);
+        parent.appendChild(child);
+        return child;
+    }
+
+    /**
+     * Appends the child element as well as value to the parent element.
+     *
+     * @param parent  the parent element
+     * @param tagName the child element name
+     * @param value   the child element value
+     * @return the child element added to the parent element
+     */
+    public static Element appendElement(Element parent, String tagName, String value) {
+        Element child = appendElement(parent, tagName);
+        child.setTextContent(value);
+        return child;
+    }
+
+    /**
+     * Appends another element as a child element.
+     *
+     * @param parent the parent element
+     * @param child  the child element to append
+     */
+    public static void appendElement(Element parent, Element child) {
+        Node tmp = parent.getOwnerDocument().importNode(child, true);
+        parent.appendChild(tmp);
+    }
+
+    /**
+     * Appends the CDATA element to the parent element.
+     *
+     * @param parent  the parent element
+     * @param tagName the CDATA element name
+     * @param value   the CDATA element value
+     * @return the CDATA element added to the parent element
+     */
+    public static Element appendCDATAElement(Element parent, String tagName, String value) {
+        Element child = appendElement(parent, tagName);
+        if (value == null) { // avoid "null" word in the XML payload
+            value = "";
+        }
+
+        Node cdata = child.getOwnerDocument().createCDATASection(value);
+        child.appendChild(cdata);
+        return child;
+    }
+
+    /**
+     * Converts the Node/Element instance to XML payload.
+     *
+     * @param node the node/element instance to convert
+     * @return the XML payload representing the node/element
+     * @throws XmlException problem converting XML to string
+     */
+    public static String childNodeToString(Node node) throws XmlException {
+        String payload = null;
+
+        try {
+            Transformer tf = TransformerFactory.newInstance().newTransformer();
+
+            Properties props = tf.getOutputProperties();
+            props.setProperty(OutputKeys.OMIT_XML_DECLARATION, LOGIC_YES);
+            tf.setOutputProperties(props);
+
+            StringWriter writer = new StringWriter();
+            tf.transform(new DOMSource(node), new StreamResult(writer));
+            payload = writer.toString();
+            payload = payload.replaceAll(REG_INVALID_CHARS, " ");
+        } catch (TransformerException e) {
+            throw new XmlException(XmlException.XML_TRANSFORM_ERROR, e);
+        }
+
+        return payload;
+    }
+
+    /**
+     * Converts the Node/Document/Element instance to XML payload.
+     *
+     * @param node the node/document/element instance to convert
+     * @return the XML payload representing the node/document/element
+     * @throws XmlException problem converting XML to string
+     */
+    public static String nodeToString(Node node) throws XmlException {
+        String payload = null;
+
+        try {
+            Transformer tf = TransformerFactory.newInstance().newTransformer();
+
+            Properties props = tf.getOutputProperties();
+            props.setProperty(OutputKeys.INDENT, LOGIC_YES);
+            props.setProperty(OutputKeys.ENCODING, DEFAULT_ENCODE);
+            tf.setOutputProperties(props);
+
+            StringWriter writer = new StringWriter();
+            tf.transform(new DOMSource(node), new StreamResult(writer));
+            payload = writer.toString();
+            payload = payload.replaceAll(REG_INVALID_CHARS, " ");
+        } catch (TransformerException e) {
+            throw new XmlException(XmlException.XML_TRANSFORM_ERROR, e);
+        }
+
+        return payload;
+    }
+
+    /**
+     * Converts the an XML file to XML payload.
+     *
+     * @param file the XML file instance
+     * @return the XML payload representing the XML file
+     * @throws XmlException problem transforming XML to string
+     */
+    public static String xmlToString(File file) throws XmlException {
+        Element root = getRootElementFromFile(file);
+        return nodeToString(root);
+    }
+
+    /**
+     * Converts the an XML file input stream to XML payload.
+     *
+     * @param in the XML file input stream
+     * @return the payload represents the XML file
+     * @throws XmlException problem transforming XML to string
+     */
+    public static String xmlToString(InputStream in) throws XmlException {
+        Element root = getRootElementFromStream(in);
+        return nodeToString(root);
+    }
+
+    /**
+     * Saves the node/document/element as XML file.
+     *
+     * @param doc  the XML node/document/element to save
+     * @param file the XML file to save
+     * @throws XmlException problem persisting XML file
+     */
+    public static void saveToXml(Node doc, File file) throws XmlException {
+        OutputStream out = null;
+
+        try {
+            Transformer tf = TransformerFactory.newInstance().newTransformer();
+
+            Properties props = tf.getOutputProperties();
+            props.setProperty(OutputKeys.METHOD, XMLConstants.XML_NS_PREFIX);
+            props.setProperty(OutputKeys.INDENT, LOGIC_YES);
+            tf.setOutputProperties(props);
+
+            DOMSource dom = new DOMSource(doc);
+            out = getOutputStream(file);
+            Result result = new StreamResult(out);
+            tf.transform(dom, result);
+        } catch (TransformerException e) {
+            throw new XmlException(XmlException.XML_TRANSFORM_ERROR, e);
+        } finally {
+            if (out != null) {
+                try {
+                    out.close();
+                } catch (IOException e) {
+                    // nothing to do
+                }
+            }
+        }
+    }
+
+    /**
+     * Validates the element tree context via given XML schema file.
+     *
+     * @param doc        the XML document to validate
+     * @param schemaFile the XML schema file instance
+     * @throws XmlException error occurs if the schema file not exists
+     */
+    public static void validateXml(Node doc, File schemaFile) throws XmlException {
+        validateXml(doc, getInputStream(schemaFile));
+    }
+
+    /**
+     * Validates the element tree context via given XML schema file.
+     *
+     * @param doc          the XML document to validate
+     * @param schemaStream the XML schema file input stream
+     * @throws XmlException error occurs if validation fail
+     */
+    public static void validateXml(Node doc, InputStream schemaStream) throws XmlException {
+        try {
+            Source source = new StreamSource(schemaStream);
+            Schema schema = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI).newSchema(source);
+
+            Validator validator = schema.newValidator();
+            validator.validate(new DOMSource(doc));
+        } catch (SAXException e) {
+            throw new XmlException(XmlException.XML_VALIDATE_ERROR, e);
+        } catch (IOException e) {
+            throw new XmlException(XmlException.XML_READ_ERROR, e);
+        } finally {
+            if (schemaStream != null) {
+                try {
+                    schemaStream.close();
+                } catch (IOException e) {
+                    // nothing to do
+                }
+            }
+        }
+    }
+
+    /**
+     * Transforms the XML content to XHTML/HTML format string with the XSL.
+     *
+     * @param payload  the XML payload to convert
+     * @param xsltFile the XML stylesheet file
+     * @return the transformed XHTML/HTML format string
+     * @throws XmlException problem converting XML to HTML
+     */
+    public static String xmlToHtml(String payload, File xsltFile) throws XmlException {
+        String result = null;
+
+        try {
+            Source template = new StreamSource(xsltFile);
+            Transformer transformer = TransformerFactory.newInstance().newTransformer(template);
+
+            Properties props = transformer.getOutputProperties();
+            props.setProperty(OutputKeys.OMIT_XML_DECLARATION, LOGIC_YES);
+            transformer.setOutputProperties(props);
+
+            StreamSource source = new StreamSource(new StringReader(payload));
+            StreamResult sr = new StreamResult(new StringWriter());
+            transformer.transform(source, sr);
+
+            result = sr.getWriter().toString();
+        } catch (TransformerException e) {
+            throw new XmlException(XmlException.XML_TRANSFORM_ERROR, e);
+        }
+
+        return result;
+    }
+
+    /**
+     * Sets the namespace to specific element.
+     *
+     * @param element        the element to set
+     * @param namespace      the namespace to set
+     * @param schemaLocation the XML schema file location URI
+     */
+    public static void setNamespace(Element element, String namespace, String schemaLocation) {
+        element.setAttributeNS(XMLConstants.XMLNS_ATTRIBUTE_NS_URI, XMLConstants.XMLNS_ATTRIBUTE, namespace);
+        element.setAttributeNS(XMLConstants.XMLNS_ATTRIBUTE_NS_URI, XMLNS_XSI,
+                XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI);
+        element.setAttributeNS(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI, XSI_SCHEMA_LOCATION, schemaLocation);
+    }
+
+    /**
+     * Encode the XML payload to legality character.
+     *
+     * @param payload the XML payload to encode
+     * @return the encoded XML payload
+     * @throws XmlException problem encoding the XML payload
+     */
+    public static String encodeXml(String payload) throws XmlException {
+        Element root = createRootElement(XMLConstants.XML_NS_PREFIX);
+        root.setTextContent(payload);
+        return childNodeToString(root.getFirstChild());
+    }
+
+    private static InputStream getInputStream(File file) throws XmlException {
+        InputStream in = null;
+
+        try {
+            in = new FileInputStream(file);
+        } catch (FileNotFoundException e) {
+            throw new XmlException(XmlException.FILE_NOT_FOUND, e);
+        }
+
+        return in;
+    }
+
+    private static OutputStream getOutputStream(File file) throws XmlException {
+        OutputStream in = null;
+
+        try {
+            in = new FileOutputStream(file);
+        } catch (FileNotFoundException e) {
+            throw new XmlException(XmlException.FILE_NOT_FOUND, e);
+        }
+
+        return in;
+    }
+
+}

+ 70 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/CharBasedSimilarity.java

@@ -0,0 +1,70 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.word;
+
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.ISimilarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.util.StringUtil;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * 词语相似度:字面相似度计算方法
+ *
+ * @author itbluebox
+ */
+public class CharBasedSimilarity implements ISimilarity {
+    private final double alpha = 0.6;
+    private final double beta = 0.4;
+    private static CharBasedSimilarity instance = null;
+
+    public static CharBasedSimilarity getInstance() {
+        if (instance == null) {
+            instance = new CharBasedSimilarity();
+        }
+        return instance;
+    }
+
+    private CharBasedSimilarity() {
+
+    }
+
+
+    @Override
+    public double getSimilarity(String word1, String word2) {
+        if (StringUtil.isBlank(word1) && StringUtil.isBlank(word2)) {
+            return 1.0;
+        }
+        if (StringUtil.isBlank(word1) || StringUtil.isBlank(word2)) {
+            return 0.0;
+        }
+        if (word1.equalsIgnoreCase(word2)) {
+            return 1.0;
+        }
+        List<Character> sameChars = new ArrayList<>();
+        String longString = StringUtil.getLongString(word1, word2);
+        String shortString = StringUtil.getShortString(word1, word2);
+        for (int i = 0; i < longString.length(); i++) {
+            Character ch = longString.charAt(i);
+            if (shortString.contains(ch.toString())) {
+                sameChars.add(ch);
+            }
+        }
+        double dp = Math.min(1.0 * word1.length() / word2.length(), 1.0 * word2.length() / word1.length());
+        double part1 = alpha * (1.0 * sameChars.size() / word1.length() + 1.0 * sameChars.size() / word2.length()) / 2.0;
+        double part2 = beta * dp * (getWeightedResult(word1, sameChars) + getWeightedResult(word1, sameChars)) / 2.0;
+
+        return part1 + part2;
+    }
+
+    private double getWeightedResult(String word, List<Character> sameChars) {
+        double top = 0.0;
+        double bottom = 0.0;
+        for (int i = 0; i < word.length(); i++) {
+            if (sameChars.contains(word.charAt(i))) {
+                top += (i + 1);
+            }
+            bottom += (i + 1);
+        }
+        return 1.0 * top / bottom;
+    }
+
+}

+ 11 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/IWordSimilarity.java

@@ -0,0 +1,11 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.word;
+
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.ISimilarity;
+
+/**
+ * 处理词层面的相似度计算接口
+ *
+ * @author itbluebox
+ */
+public interface IWordSimilarity extends ISimilarity {
+}

+ 74 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/clin/CilinCode.java

@@ -0,0 +1,74 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.clin;
+
+/**
+ * 哈工大词林扩展版规则编码表
+ * 表中编码位从左到右顺序排列,其中,第8位对应的标记位为“=”、“#”和“@”三种符号之一。其中“=”代表常见的“同义”关系,
+ * “#”代表词语之间的相关关系,“@”则代表词语自我封闭的独立性质,它在词典中既没有同义词,也没有相关词。
+ *
+ * @author itbluebox
+ */
+public class CilinCode {
+    public static double[] WEIGHT = new double[]{1.2, 1.2, 1.0, 1.0, 0.8, 0.4};
+    public static double TOTAL_WEIGHT = 5.6;
+
+    public static String getCodeLevel(String code, int level) {
+        switch (level) {
+            case 1:
+                return code.substring(0, 1);
+            case 2:
+                return code.substring(1, 2);
+            case 3:
+                return code.substring(2, 4);
+            case 4:
+                return code.substring(4, 5);
+            case 5:
+                return code.substring(5, 7);
+            case 6:
+                return code.substring(7);
+        }
+        return "";
+    }
+
+    /**
+     * 获取共同部分编码的权重
+     *
+     * @param code1
+     * @param code2
+     * @return
+     */
+    public static double calculateCommonWeight(String code1, String code2) {
+        double weight = 0.0;
+        for (int i = 1; i <= 6; i++) {
+            String c1 = getCodeLevel(code1, i);
+            String c2 = getCodeLevel(code2, i);
+            if (c1.equals(c2)) {
+                weight += WEIGHT[i - 1];
+            } else {
+                break;
+            }
+        }
+        return weight;
+    }
+
+    /**
+     * 打印
+     *
+     * @param code
+     * @return
+     */
+    public static String printCoding(String code) {
+        StringBuilder sb = new StringBuilder();
+        for (int i = 1; i <= 6; i++) {
+            if (i == 1) {
+                sb.append("[LEVEL_" + i);
+            } else {
+                sb.append(", LEVEL_" + i);
+            }
+            sb.append(": ");
+            sb.append(getCodeLevel(code, i));
+        }
+        sb.append("]");
+
+        return sb.toString();
+    }
+}

+ 85 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/clin/CilinDictionary.java

@@ -0,0 +1,85 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.clin;
+
+import com.ruoyi.utils.resumeAnalysis.similarity.Similarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.util.DicReader;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.util.FileUtil;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.util.StringUtil;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.util.TraverseEvent;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.zip.GZIPInputStream;
+
+/**
+ * 词林编码
+ *
+ * @author itbluebox
+ */
+public class CilinDictionary {
+    private static Logger logger = LoggerFactory.getLogger(CilinDictionary.class);
+    private final static String path = Similarity.Config.CilinPath;
+    // 以词语为索引
+    private final Map<String, Set<String>> wordIndex = new HashMap<>();
+    // 以编码为索引
+    private final Map<String, Set<String>> codeIndex = new HashMap<>();
+    private static CilinDictionary instance;
+
+    public static CilinDictionary getInstance() {
+        if (instance == null) {
+            try {
+                instance = new CilinDictionary();
+            } catch (IOException e) {
+                logger.error("exception:{}", e.getMessage());
+            }
+        }
+        return instance;
+    }
+
+    private CilinDictionary() throws IOException {
+        InputStream inputStream = new GZIPInputStream(DicReader.getInputStream(path));
+        TraverseEvent<String> event = line -> {
+            String[] items = line.split(" ");
+            Set<String> set = new HashSet<>();
+            for (int i = 2; i < items.length; i++) {
+                String code = items[i].trim();
+                if (StringUtil.isNotBlank(code)) {
+                    set.add(code);
+                    // add to codeIndex
+                    Set<String> codeWords = codeIndex.get(code);
+                    if (codeWords == null) {
+                        codeWords = new HashSet<>();
+                    }
+                    codeWords.add(items[0]);
+                    codeIndex.put(code, codeWords);
+                }
+            }
+            wordIndex.put(items[0], set);
+            return false;
+        };
+        logger.info("loading cilin dictionary...");
+        long start = System.currentTimeMillis();
+        FileUtil.traverseLines(inputStream, "UTF-8", event);
+        logger.info("loading ciling dictionary complete! time spend:{}", System.currentTimeMillis() - start + "ms");
+    }
+
+    /**
+     * 获取某词语的词林编码,一个词语可以对应多个编码,set表示
+     *
+     * @param word
+     * @return
+     */
+    public Set<String> getCilinCodes(String word) {
+        return wordIndex.get(word);
+    }
+
+    public Set<String> getCilinWords(String code) {
+        return codeIndex.get(code);
+    }
+
+}

+ 58 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/clin/CilinSimilarity.java

@@ -0,0 +1,58 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.clin;
+
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.ISimilarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.util.StringUtil;
+
+import java.util.Set;
+
+/**
+ * 词林编码的相似度计算
+ *
+ * @author itbluebox
+ */
+public class CilinSimilarity implements ISimilarity {
+    private static CilinSimilarity instance = null;
+
+    public static CilinSimilarity getInstance() {
+        if (instance == null) {
+            instance = new CilinSimilarity();
+        }
+        return instance;
+    }
+
+    private CilinSimilarity() {
+    }
+
+    @Override
+    public double getSimilarity(String word1, String word2) {
+        if (StringUtil.isBlank(word1) && StringUtil.isBlank(word2)) {
+            return 1.0;
+        }
+        if (StringUtil.isBlank(word1) || StringUtil.isBlank(word2)) {
+            return 0.0;
+        }
+        if (word1.equalsIgnoreCase(word2)) {
+            return 1.0;
+        }
+        double sim = 0.0;
+        Set<String> codeSet1 = CilinDictionary.getInstance().getCilinCodes(word1);
+        Set<String> codeSet2 = CilinDictionary.getInstance().getCilinCodes(word2);
+        if (codeSet1 == null || codeSet2 == null) {
+            return 0.0;
+        }
+        for (String code1 : codeSet1) {
+            for (String code2 : codeSet2) {
+                double s = getSimilarityByCode(code1, code2);
+                //System.out.println(code1 + ":" + code2 + ":" + CilinCode.calculateCommonWeight(code1, code2));
+                if (sim < s) sim = s;
+            }
+        }
+
+        return sim;
+    }
+
+    public double getSimilarityByCode(String code1, String code2) {
+        return CilinCode.calculateCommonWeight(code1, code2) / CilinCode.TOTAL_WEIGHT;
+    }
+
+}

+ 68 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/hownet/IHownetMeta.java

@@ -0,0 +1,68 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet;
+
+/**
+ * Hownet 接口
+ *
+ * @author itbluebox
+ */
+public interface IHownetMeta {
+    String Symbol_Descriptions[][] = {
+            {"#", "表示与其相关"},
+            {"%", "是其部分"},
+            {"$", "可以被该V处置,或是该V的受事、对象、领有物,或内容"},
+            {"*", "施事或工具"},
+            {"+", "所标记的角色是隐性的,几乎在实际语言中不会出现"},
+            {"&", "指向"},
+            {"~", "多半是,多半有,很可能"},
+            {"@", "可以做V的空间或时间"},
+            {"?", "可以使N的材料"},
+            {"(", "至于其中的应该是一个词标记"},
+            {"^", "不存在,或没有,或不能"},
+            {"!", "表示某一属性为一敏感的属性,如味道之与食物"},
+            {"[", "标示概念的共性属性"}
+    };
+
+    /**
+     * γ:具体词与义元的相似度一律为一个较小的常数
+     */
+    double gamma = 0.2;
+
+    /**
+     * δ:任一个非空值与空值的相似度为一个较小的常数,此处为0.2
+     */
+    double delta = 0.2;
+
+    /**
+     * β1实词概念第一基本义原描述式的权重
+     */
+    double beta1 = 0.5;
+    /**
+     * β2实词概念其他基本义原描述式的权重
+     */
+    double beta2 = 0.2;
+    /**
+     * β3实词概念关系义原描述式的权重
+     */
+    double beta3 = 0.17;
+    /**
+     * β4实词概念符号义原描述式的权重
+     */
+    double beta4 = 0.13;
+
+    /**
+     * Θ 计算后面概念的义原与参照概念所有义原的最大相似度, 并乘以两个概念主义原相似度的积(主义原通过该方式起约束作用),
+     * 如果数值大于该值时才会起参照作用, 去掉冗余的不重要义原
+     */
+    double PARAM_THETA = 0.5;
+    /**
+     * Ω 计算前面概念的义原与参照概念所有义原的最大相似度,并乘以两个概念主义原相似度的积(主义原通过该方式起约束作用),
+     * 如果数值大于该值时才会调整前面概念的义原符号, 以起修正作用
+     */
+    double PARAM_OMEGA = 0.8;
+    /**
+     * 阈值
+     */
+    double PARAM_XI = 0.6;
+
+
+}

+ 230 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/hownet/concept/Concept.java

@@ -0,0 +1,230 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.concept;
+
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.IHownetMeta;
+
+import java.util.*;
+
+/**
+ * 知网的概念表示类
+ * <p>example和英文部分对于相似度的计算不起作用,考虑到内存开销, 在概念的表示中去掉了这部分数据的对应定义
+ *
+ * @author itbluebox
+ */
+public class Concept implements IHownetMeta {
+    // 概念名称
+    protected String word;
+    // 词性 part of speech
+    protected String pos;
+    // 定义
+    protected String define;
+    // 实词(true),虚词(false)
+    protected boolean bSubstantive;
+    // 主基本义原
+    protected String mainSememe;
+    // 其他基本义原
+    protected String[] secondSememes;
+    // 关系义原
+    protected String[] relationSememes;
+    // 关系符号描述
+    protected String[] symbolSememes;
+    // 类型
+    static String[][] concept_Type = {
+            {"=", "事件"},
+            {"aValue|属性值", "属性值"},
+            {"qValue|数量值", "数量值"},
+            {"attribute|属性", "属性"},
+            {"quantity|数量", "数量"},
+            {"unit|", "单位"},
+            {"%", "部件"}
+    };
+
+    public Concept(String word, String pos, String define) {
+        this.word = word;
+        this.pos = pos;
+        this.define = (define == null) ? "" : define.trim();
+
+        // 虚词表示:{***}
+        if (define.length() > 0 && define.charAt(0) == '{' && define.charAt(define.length() - 1) == '}') {
+            this.bSubstantive = false;
+        } else {
+            this.bSubstantive = true;
+        }
+        initDefine();
+    }
+
+    private void initDefine() {
+        List<String> secondList = new ArrayList<>();//求他基本义原
+        List<String> relationList = new ArrayList<>();//关系义原
+        List<String> symbolList = new ArrayList<>();//符号义原
+        String tokenString = this.define;
+        if (!this.bSubstantive) {//如果不是实词,则处理“{}”中的内容
+            tokenString = define.substring(1, define.length() - 1);
+        }
+        StringTokenizer token = new StringTokenizer(tokenString, ",", false);
+
+        if (token.hasMoreTokens()) {
+            this.mainSememe = token.nextToken();
+        }
+        main_loop:
+        while (token.hasMoreTokens()) {
+            String item = token.nextToken();
+            if (item.equals("")) continue;
+            //判断符号义原
+            String symbol = item.substring(0, 1);
+            for (int i = 0; i < Symbol_Descriptions.length; i++) {
+                if (symbol.equals(Symbol_Descriptions[i][0])) {
+                    symbolList.add(item);
+                    continue main_loop;
+                }
+            }
+            //判断第二基本义原
+            if (item.indexOf('=') > 0) {
+                relationList.add(item);
+            } else {
+                secondList.add(item);
+            }
+        }
+        this.secondSememes = secondList.toArray(new String[secondList.size()]);
+        this.relationSememes = relationList.toArray(new String[relationList.size()]);
+        this.symbolSememes = symbolList.toArray(new String[symbolList.size()]);
+    }
+
+    public String getWord() {
+        return word;
+    }
+
+    public void setWord(String word) {
+        this.word = word;
+    }
+
+    public String getPos() {
+        return pos;
+    }
+
+    public void setPos(String pos) {
+        this.pos = pos;
+    }
+
+    public String getDefine() {
+        return define;
+    }
+
+    public void setDefine(String define) {
+        this.define = define;
+    }
+
+    public boolean isbSubstantive() {
+        return bSubstantive;
+    }
+
+    public void setbSubstantive(boolean bSubstantive) {
+        this.bSubstantive = bSubstantive;
+    }
+
+    public String getMainSememe() {
+        return mainSememe;
+    }
+
+    public void setMainSememe(String mainSememe) {
+        this.mainSememe = mainSememe;
+    }
+
+    public String[] getSecondSememes() {
+        return secondSememes;
+    }
+
+    public void setSecondSememes(String[] secondSememes) {
+        this.secondSememes = secondSememes;
+    }
+
+    public String[] getRelationSememes() {
+        return relationSememes;
+    }
+
+    public void setRelationSememes(String[] relationSememes) {
+        this.relationSememes = relationSememes;
+    }
+
+    public String[] getSymbolSememes() {
+        return symbolSememes;
+    }
+
+    public void setSymbolSememes(String[] symbolSememes) {
+        this.symbolSememes = symbolSememes;
+    }
+
+    public Set<String> getAllSememeNames() {
+        Set<String> names = new HashSet<>();
+        //主义原
+        names.add(getMainSememe());
+        //关系义原
+        for (String item : getRelationSememes()) {
+            names.add(item.substring(item.indexOf("=") + 1));
+        }
+        //符号义原
+        for (String item : getSymbolSememes()) {
+            names.add(item.substring(1));
+        }
+        //其他义原集合
+        for (String item : getSecondSememes()) {
+            names.add(item);
+        }
+        return names;
+    }
+
+    public String getType() {
+        for (int i = 0; i < concept_Type.length; i++) {
+            if (define.toUpperCase().indexOf(concept_Type[i][0].toUpperCase()) >= 0) {
+                return concept_Type[i][1];
+            }
+        }
+        return "普通";
+    }
+
+    @Override
+    public String toString() {
+        StringBuilder sb = new StringBuilder();
+        sb.append("名称=");
+        sb.append(this.word);
+        sb.append("; 词性=");
+        sb.append(this.pos);
+        sb.append("; 定义=");
+        sb.append(this.define);
+        sb.append("; 第一基本义元:[" + mainSememe);
+
+        sb.append("]; 其他基本义元描述:[");
+        for (String sem : secondSememes) {
+            sb.append(sem);
+            sb.append(";");
+        }
+
+        sb.append("]; [关系义元描述:");
+        for (String sem : relationSememes) {
+            sb.append(sem);
+            sb.append(";");
+        }
+
+        sb.append("]; [关系符号描述:");
+        for (String sem : symbolSememes) {
+            sb.append(sem);
+            sb.append(";");
+        }
+        sb.append("]");
+        return sb.toString();
+    }
+
+    @Override
+    public int hashCode() {
+        return define == null ? word.hashCode() : define.hashCode();
+    }
+
+    @Override
+    public boolean equals(Object object) {
+        if (object instanceof Concept) {
+            Concept c = (Concept) object;
+            return word.equals(c.word) && define.equals(c.define);
+        } else {
+            return false;
+        }
+    }
+}

+ 24 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/hownet/concept/ConceptLinkedList.java

@@ -0,0 +1,24 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.concept;
+
+import java.util.LinkedList;
+
+/**
+ * 概念处理列
+ * @author itbluebox
+ */
+public class ConceptLinkedList extends LinkedList<Concept>{
+    public void removeLast(int size){
+        for(int i =0;i<size;i++){
+            removeLast();
+        }
+    }
+
+    public void addByDefine(Concept concept){
+        for(Concept c:this){
+            if(c.getDefine().equals(concept.getDefine())){
+                return;
+            }
+        }
+        add(concept);
+    }
+}

+ 256 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/hownet/concept/ConceptParser.java

@@ -0,0 +1,256 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.concept;
+
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.Multimap;
+import com.ruoyi.utils.resumeAnalysis.similarity.Similarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.util.DicReader;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.util.MathUtil;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.util.StringUtil;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.IWordSimilarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.IHownetMeta;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.sememe.SememeParser;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.xml.namespace.QName;
+import javax.xml.stream.XMLEventReader;
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.events.StartElement;
+import javax.xml.stream.events.XMLEvent;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collection;
+import java.util.zip.GZIPInputStream;
+
+/**
+ * 概念解析器
+ *
+ * @author itbluebox
+ */
+public abstract class ConceptParser implements IHownetMeta, IWordSimilarity {
+    private static final Logger logger = LoggerFactory.getLogger(ConceptParser.class);
+    private static Multimap<String, Concept> CONCEPTS = null;
+    private final static String path = Similarity.Config.ConceptXmlPath;
+    protected SememeParser sememeParser = null;
+
+    /**
+     * 集合运算类型,目前支持均值运算和模糊集运算两种形式
+     */
+    public enum OPERATE_TYPE {
+        AVERAGE,
+        FUZZY
+    }
+
+    private OPERATE_TYPE currentOperateType = OPERATE_TYPE.AVERAGE;
+
+    public ConceptParser(SememeParser sememeParser) throws IOException {
+        this.sememeParser = sememeParser;
+        synchronized (this) {
+            if (CONCEPTS == null) {
+                loadFile();
+            }
+        }
+    }
+
+
+    private static void loadFile() throws IOException {
+        CONCEPTS = HashMultimap.create();
+        InputStream inputStream = new GZIPInputStream(DicReader.getInputStream(path));
+        load(inputStream);
+    }
+
+    /**
+     * 用户自定义概念词典
+     *
+     * @param xmlFile
+     * @throws IOException
+     */
+    public static void load(File xmlFile) throws IOException {
+        if (CONCEPTS == null) {
+            loadFile();
+        }
+        load(new FileInputStream(xmlFile));
+    }
+
+    private static void load(InputStream inputStream) throws IOException {
+        long start = System.currentTimeMillis();
+        int count = 0;
+        try {
+            XMLInputFactory inputFactory = XMLInputFactory.newInstance();
+            XMLEventReader xmlEventReader = inputFactory.createXMLEventReader(inputStream);
+            while (xmlEventReader.hasNext()) {
+                XMLEvent event = xmlEventReader.nextEvent();
+                if (event.isStartElement()) {
+                    StartElement startElement = event.asStartElement();
+                    if (startElement.getName().toString().equals("c")) {
+                        String word = startElement.getAttributeByName(QName.valueOf("w")).getValue();
+                        String define = startElement.getAttributeByName(QName.valueOf("d")).getValue();
+                        String pos = startElement.getAttributeByName(QName.valueOf("p")).getValue();
+                        CONCEPTS.put(word, new Concept(word, pos, define));
+                        count++;
+                    }
+                }
+            }
+            inputStream.close();
+        } catch (Exception e) {
+            throw new IOException(e);
+        }
+        logger.info("complete! count num:" + count + ",time spend:" + (System.currentTimeMillis() - start) + "ms");
+    }
+
+    /**
+     * 获取两个词语的相似度,如果一个词语对应多个概念,则返回相似度最大的一对
+     *
+     * @param word1
+     * @param word2
+     * @return
+     */
+    public abstract double getSimilarity(String word1, String word2);
+
+    /**
+     * 计算四个组成部分的相似度方式,不同的算法对这四个部分的处理或者说权重分配不同
+     *
+     * @param sim_v1 主义原的相似度
+     * @param sim_v2 其他基本义原的相似度
+     * @param sim_v3 关系义原的相似度
+     * @param sim_v4 符号义原的相似度
+     * @return
+     */
+    protected abstract double calculate(double sim_v1, double sim_v2, double sim_v3, double sim_v4);
+
+    /**
+     * 判断一个词语是否是一个概念
+     *
+     * @param word
+     * @return
+     */
+    public boolean isConcept(String word) {
+        return StringUtil.isNotBlank(CONCEPTS.get(word));
+    }
+
+    /**
+     * 根据名称获取对应的概念定义信息,由于一个词语可能对应多个概念,因此返回一个集合
+     *
+     * @param key
+     * @return
+     */
+    public Collection<Concept> getConcepts(String key) {
+        return CONCEPTS.get(key);
+    }
+
+    public double getSimilarity(Concept concept1, Concept concept2) {
+        double similarity = 0.0;
+        if (concept1 == null || concept2 == null || !concept1.getPos().equals(concept2.getPos())) {
+            return 0.0;
+        }
+        if (concept1.equals(concept2)) {
+            return 1.0;
+        }
+        // 虚词和实词概念的相似度是0
+        if (concept1.isbSubstantive() != concept2.isbSubstantive()) {
+            return 0.0;
+        }
+        // 虚词
+        if (concept1.isbSubstantive() == false) {
+            similarity = sememeParser.getSimilarity(concept1.getMainSememe(), concept2.getMainSememe());
+        } else {// 实词
+            double sim1 = sememeParser.getSimilarity(concept1.getMainSememe(), concept2.getMainSememe());
+            double sim2 = getSimilarity(concept1.getSecondSememes(), concept2.getSecondSememes());
+            double sim3 = getSimilarity(concept1.getRelationSememes(), concept2.getRelationSememes());
+            double sim4 = getSimilarity(concept1.getSymbolSememes(), concept2.getSymbolSememes());
+            similarity = calculate(sim1, sim2, sim3, sim4);
+        }
+        return similarity;
+    }
+
+    /**
+     * 计算两个义原集合的相似度
+     * 每一个集合都是一个概念的某一类义原集合,如第二基本义原、符号义原、关系义原等
+     *
+     * @param sememes1
+     * @param sememes2
+     * @return
+     */
+    private double getSimilarity(String[] sememes1, String[] sememes2) {
+        if (currentOperateType == OPERATE_TYPE.FUZZY) {
+            return getSimilarityFuzzy(sememes1, sememes2);
+        } else {
+            return getSimilarityAVG(sememes1, sememes2);
+        }
+    }
+
+    private double getSimilarityFuzzy(String[] sememes1, String[] sememes2) {
+        // TODO
+        return 0;
+    }
+
+    public void setOperateType(OPERATE_TYPE type) {
+        this.currentOperateType = type;
+    }
+
+    private double getSimilarityAVG(String[] sememes1, String[] sememes2) {
+        double similarity;
+        double scoreArray[][];
+        if (StringUtil.isBlank(sememes1) || StringUtil.isBlank(sememes2)) {
+            if (StringUtil.isBlank(sememes1) && StringUtil.isBlank(sememes2)) {
+                return 1.0;
+            } else {
+                return delta;// 一个非空值与空值的相似度为一个小的常数
+            }
+        }
+        double score = 0.0;
+        int arrayLen = MathUtil.max(sememes1.length, sememes2.length);
+        scoreArray = new double[arrayLen][arrayLen];
+        // calculate similarity of two set
+        for (int i = 0; i < sememes1.length; i++) {
+            for (int j = 0; j < sememes2.length; j++) {
+                scoreArray[i][j] = sememeParser.getSimilarity(sememes1[i], sememes2[j]);
+            }
+        }
+
+        // get max similarity score
+        while (scoreArray.length > 0) {
+            double[][] temp;
+            int row = 0;
+            int column = 0;
+            double max = scoreArray[row][column];
+            for (int i = 0; i < scoreArray.length; i++) {
+                for (int j = 0; j < scoreArray[i].length; j++) {
+                    if (scoreArray[i][j] > max) {
+                        row = i;
+                        column = j;
+                        max = scoreArray[i][j];
+                    }
+                }
+            }
+            score += max;
+            // 过滤掉该行该列,继续计算
+            temp = new double[scoreArray.length - 1][scoreArray.length - 1];
+            for (int i = 0; i < scoreArray.length; i++) {
+                if (i == row) {
+                    continue;
+                }
+                for (int j = 0; j < scoreArray[i].length; j++) {
+                    if (j == column) {
+                        continue;
+                    }
+                    int tempRow = i;
+                    int tempColumn = j;
+                    if (i > row) {
+                        tempRow--;
+                    }
+                    if (j > column) {
+                        tempColumn--;
+                    }
+                    temp[tempRow][tempColumn] = scoreArray[i][j];
+                }
+            }
+            scoreArray = temp;
+        }
+        similarity = score / arrayLen;
+        return similarity;
+    }
+
+}

+ 317 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/hownet/concept/ConceptSimilarity.java

@@ -0,0 +1,317 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.concept;
+
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.util.StringUtil;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.sememe.SememeSimilarity;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * 概念解析器的实现,用于获取概念、计算概念的相似度等
+ * 加入了剪枝处理
+ *
+ * @author itbluebox
+ */
+public class ConceptSimilarity extends ConceptParser {
+    private static final int MAX_COMBINED_COUNT = 12;
+    private static ConceptSimilarity instance = null;
+
+    public static ConceptSimilarity getInstance() {
+        if (instance == null) {
+            try {
+                instance = new ConceptSimilarity();
+            } catch (IOException e) {
+                e.printStackTrace();
+            }
+        }
+        return instance;
+    }
+    private ConceptSimilarity() throws IOException {
+        super(new SememeSimilarity());
+    }
+
+    public ConceptSimilarity(SememeSimilarity sememeSimilarity) throws IOException {
+        super(sememeSimilarity);
+    }
+
+    /**
+     * 获取两个词语的相似度,如果一个词语对应多个概念,则返回相似度最大的
+     *
+     * @param word1
+     * @param word2
+     * @return
+     */
+    @Override
+    public double getSimilarity(String word1, String word2) {
+        double similarity = 0.0;
+        if (word1.equals(word2)) {
+            return 1.0;
+        }
+        Collection<Concept> concepts1 = getConcepts(word1);
+        Collection<Concept> concepts2 = getConcepts(word2);
+        // 未登录词需要计算组合概念
+        if (StringUtil.isBlank(concepts1) && StringUtil.isNotBlank(concepts2)) {
+            concepts1 = autoCombineConcepts(word1, concepts2);
+        }
+        if (StringUtil.isBlank(concepts2) && StringUtil.isNotBlank(concepts1)) {
+            concepts2 = autoCombineConcepts(word2, concepts1);
+        }
+        if (StringUtil.isBlank(concepts1) && StringUtil.isBlank(concepts2)) {
+            concepts1 = autoCombineConcepts(word1, concepts2);
+            concepts2 = autoCombineConcepts(word2, concepts1);
+            // 修正
+            concepts1 = autoCombineConcepts(word1, concepts2);
+            concepts2 = autoCombineConcepts(word2, concepts1);
+        }
+
+        // 处理所有可能组合的相似度
+        for (Concept c1 : concepts1) {
+            for (Concept c2 : concepts2) {
+                double v = getSimilarity(c1, c2);
+                if (v > similarity) {
+                    similarity = v;
+                }
+                if (similarity == 1.0) {
+                    break;
+                }
+            }
+        }
+        return similarity;
+    }
+
+    @Override
+    protected double calculate(double sim_v1, double sim_v2, double sim_v3, double sim_v4) {
+        return beta1 * sim_v1 + beta2 * sim_v1 * sim_v2 + beta3 * sim_v1 * sim_v3 + beta4 * sim_v1 * sim_v4;
+    }
+
+    public Collection<Concept> getConcepts(String key) {
+        Collection<Concept> concepts = super.getConcepts(key);
+        if (StringUtil.isBlank(concepts)) {
+            concepts = autoCombineConcepts(key, null);
+        }
+        return concepts;
+    }
+
+    /**
+     * 获取知网本身自带的概念,不组合处理
+     *
+     * @param key
+     * @return
+     */
+    public Collection<Concept> getInnerConcepts(String key) {
+        return super.getConcepts(key);
+    }
+
+    /**
+     * 计算未登录词语
+     *
+     * @param newWord
+     * @param refConcepts
+     * @return
+     */
+    public Collection<Concept> autoCombineConcepts(String newWord, Collection<Concept> refConcepts) {
+        ConceptLinkedList newConcepts = new ConceptLinkedList();
+        if (newWord == null) {
+            return newConcepts;
+        }
+        // 取最可能三个
+        for (String conceptWord : SegmentNewWord(newWord, 3)) {
+            Collection<Concept> concepts = getConcepts(conceptWord);
+            if (newConcepts.size() == 0) {
+                newConcepts.addAll(concepts);
+                continue;
+            }
+            ConceptLinkedList tempConcepts = new ConceptLinkedList();
+            for (Concept head : concepts) {
+                for (Concept tail : newConcepts) {
+                    if (StringUtil.isNotBlank(refConcepts)) {
+                        for (Concept ref : refConcepts) {
+                            tempConcepts.addByDefine(autoCombineConcept(head, tail, ref));
+                        }
+                    } else {
+                        tempConcepts.addByDefine(autoCombineConcept(head, tail, null));
+                    }
+                }
+            }
+            newConcepts = tempConcepts;
+        }
+        // 过滤删除最后的1/3
+        if ((newConcepts.size() > MAX_COMBINED_COUNT)) {
+            newConcepts.removeLast(MAX_COMBINED_COUNT / 3);
+        }
+        return newConcepts;
+    }
+
+    /**
+     * 把未登录词进行概念切分, 形成多个概念的线性链表,并倒排组织
+     * 如“娱乐场”切分完毕后存放成: 【场】 → 【娱乐】
+     *
+     * @param newWord
+     * @param top
+     * @return
+     */
+    private List<String> SegmentNewWord(String newWord, int top) {
+        List<String> results = new LinkedList<>();
+        int count = 0;
+        String word = newWord;
+        while (word != null && !word.equals("")) {
+            String token = word;
+            while (token.length() > 1 && StringUtil.isBlank(super.getConcepts(token))) {
+                token = token.substring(1);
+            }
+            results.add(token);
+            count++;
+            if (count >= top) break;
+            word = word.substring(0, word.length() - token.length());
+        }
+        return results;
+
+    }
+
+    /**
+     * 计算两个概念的组合概念
+     * 计算过程中根据参照概念修正组合结果, 实际应用中的两个概念应具有一定的先后关系(体现汉语“重心后移”特点),
+     * 如对于娱乐场,first="娱乐" second="场", 另外,
+     * 还需要修正第一个概念中的符号义原对于第二个概念主义原的实际关系,当参照概念起作用时,
+     * 即大于指定的阈值,则需要判断是否把当前义原并入组合概念中,对于第一个概念,还需要同时修正符号关系,
+     * 符合关系与参照概念保持一致
+     * @param head 第一概念
+     * @param tail 第二概念
+     * @param ref 参考
+     * @return
+     */
+    public Concept autoCombineConcept(Concept head, Concept tail, Concept ref) {
+        // 一个null,一个非null,返回非null的新概念
+        if (tail == null && head != null) {
+            return new Concept(head.getWord(), head.getPos(), head.getDefine());
+        }
+        if (head == null && tail != null) {
+            return new Concept(tail.getWord(), tail.getPos(), tail.getDefine());
+        }
+
+        // 第二个概念不是实词,直接返回第一个概念
+        if (!tail.isbSubstantive()) {
+            return new Concept(head.getWord() + tail.getWord(), head.getPos(), head.getDefine());
+        }
+        // 如无参照概念,或者参照概念是虚词,则直接相加
+        if (ref == null || !ref.isbSubstantive()) {
+            String define = tail.getDefine();
+            List<String> sememeList = getAllSememes(head, true);
+            for (String sememe : sememeList) {
+                if (!define.contains(sememe)) {
+                    define = define + "," + sememe;
+                }
+            }
+            return new Concept(head.getWord() + tail.getWord(), tail.getPos(), define);
+        }
+        // 正常处理,实词概念,参考概念非空
+        String define = tail.getMainSememe();
+        List<String> refSememes = getAllSememes(ref, false);
+        List<String> headSememes = getAllSememes(head, true);
+        List<String> tailSememes = getAllSememes(tail, false);
+
+        // 如果参照概念与第二个概念的主义原的义原相似度大于阈值THETA,
+        // 则限制组合概念定义中与第二个概念相关的义原部分为:
+        // 第二个概念的义原集合与参照概念义原集合的模糊交集
+        double mainSimilarity = sememeParser.getSimilarity(tail.getMainSememe(), ref.getMainSememe());
+        if (mainSimilarity >= PARAM_THETA) {
+            // 交集
+            for (String tailSememe : tailSememes) {
+                double maxSimilarity = 0.0;
+                String maxRefSememe = "";
+                for (String refSememe : refSememes) {
+                    double value = sememeParser.getSimilarity(tailSememe, refSememe);
+                    if (value > maxSimilarity) {
+                        maxSimilarity = value;
+                        maxRefSememe = refSememe;
+                    }
+                }
+                // 如果tail_sememe与参照概念中的相似度最大的义原经theta约束后超过阈值XI,则加入生成的组合概念定义中
+                if (maxSimilarity * mainSimilarity >= PARAM_XI) {
+                    define = define + "," + tailSememe;
+                    refSememes.remove(maxRefSememe);
+                }
+            }
+        } else {
+            define = tail.getDefine();
+        }
+        // 合并第一个概念的义原到组合概念中
+        for (String headSememe : headSememes) {
+            double maxSimilarity = 0.0;
+            String maxRefSememe = "";
+            for (String refSememe : refSememes) {
+                double value = sememeParser.getSimilarity(getPureSememe(headSememe), getPureSememe(refSememe));
+                if (value > maxSimilarity) {
+                    maxSimilarity = value;
+                    maxRefSememe = refSememe;
+                }
+            }
+            if (mainSimilarity * maxSimilarity >= PARAM_OMEGA) {
+                // 调整符号关系, 用参照概念的符号关系替换原符号关系, 通过把参照概念的非符号部分替换成前面义原的非符号内容
+                String sememe = maxRefSememe.replace(getPureSememe(maxRefSememe), getPureSememe(headSememe));
+                if (!define.contains(sememe)) {
+                    define = define + "," + sememe;
+                }
+            } else if (!define.contains(headSememe)) {
+                define = define + "," + headSememe;
+            }
+        }
+        return new Concept(head.getWord() + tail.getWord(), tail.getPos(), define);
+    }
+
+    /**
+     * 获取概念的所有义原
+     *
+     * @param concept
+     * @param includeMainSememe 是否包含主义原
+     * @return
+     */
+    private List<String> getAllSememes(Concept concept, Boolean includeMainSememe) {
+        List<String> results = new ArrayList<>();
+        if (concept != null) {
+            if (includeMainSememe) {
+                results.add(concept.getMainSememe());
+            }
+            for (String sememe : concept.getSecondSememes()) {
+                results.add(sememe);
+            }
+            for (String sememe : concept.getSymbolSememes()) {
+                results.add(sememe);
+            }
+            for (String sememe : concept.getRelationSememes()) {
+                results.add(sememe);
+            }
+        }
+        return results;
+    }
+
+    /**
+     * 去掉义原的符号和关系
+     *
+     * @param sememe
+     * @return
+     */
+    private String getPureSememe(String sememe) {
+        String line = sememe.trim();
+        if ((line.charAt(0) == '(') && (line.charAt(line.length() - 1) == ')')) {
+            line = line.substring(1, line.length() - 1);
+        }
+        // 符号
+        String symbol = line.substring(0, 1);
+        for (int i = 0; i < Symbol_Descriptions.length; i++) {
+            if (symbol.equals(Symbol_Descriptions[i][0])) {
+                return line.substring(1);
+            }
+        }
+
+        // 关系义原、第二基本义原
+        int pos = line.indexOf('=');
+        if (pos > 0) {
+            line = line.substring(pos + 1);
+        }
+        return line;
+    }
+}

+ 109 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/hownet/sememe/Sememe.java

@@ -0,0 +1,109 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.sememe;
+
+/**
+ * 描述知网义原的基本对象
+ * sememe cn="成功" define="{experiencer,scope}" en="succeed" id="1-1-2-1-4-5"/>
+ * 义原的id表明了义原之间的上下位关系和义原的深度。
+ *
+ * @author itbluebox
+ */
+public class Sememe {
+    // 义原编号
+    private String id;
+    // 中文名称
+    private String cnWord;
+    // 英文名称
+    private String enWord;
+    // 定义
+    private String define;
+
+    /**
+     * 每一行的形式为:be|是 {relevant,isa}/{relevant,descriptive}
+     * <br/>或者 official|官 [#organization|组织,#employee|员]
+     * <br/>或者 amount|多少
+     * <br/>把相应的部分赋予不同的属性
+     * 出于性能考虑,把未用到的英文名称、定义等忽略
+     *
+     * @param id
+     * @param en
+     * @param cn
+     * @param define
+     */
+    public Sememe(String id, String en, String cn, String define) {
+        this.id = id;
+        this.cnWord = cn;
+        this.enWord = en;
+        this.define = define;
+    }
+
+    public String getId() {
+        return id;
+    }
+
+    public void setId(String id) {
+        this.id = id;
+    }
+
+    public String getCnWord() {
+        return cnWord;
+    }
+
+    public void setCnWord(String cnWord) {
+        this.cnWord = cnWord;
+    }
+
+    public String getEnWord() {
+        return enWord;
+    }
+
+    public void setEnWord(String enWord) {
+        this.enWord = enWord;
+    }
+
+    public String getDefine() {
+        return define;
+    }
+
+    public void setDefine(String define) {
+        this.define = define;
+    }
+
+    public int getType() {
+        char c = id.charAt(0);
+        switch (c) {
+            case '1':
+                return SememeType.Event;
+            case '2':
+                return SememeType.Entity;
+            case '3':
+                return SememeType.Attribute;
+            case '4':
+                return SememeType.Quantity;
+            case '5':
+                return SememeType.AValue;
+            case '6':
+                return SememeType.QValue;
+            case '7':
+                return SememeType.SecondaryFeature;
+            case '8':
+                return SememeType.Syntax;
+            case '9':
+                return SememeType.EventRoleAndFeature;
+            default:
+                return 0;
+        }
+    }
+
+    @Override
+    public String toString() {
+        StringBuilder sb = new StringBuilder();
+        sb.append("义原编号=");
+        sb.append(id);
+        sb.append(";中文名称=");
+        sb.append(cnWord);
+        sb.append(";英文名称=");
+        sb.append(";定义=");
+        sb.append(define);
+        return sb.toString();
+    }
+}

+ 84 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/hownet/sememe/SememeParser.java

@@ -0,0 +1,84 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.sememe;
+
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.Multimap;
+import com.ruoyi.utils.resumeAnalysis.similarity.Similarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.ISimilarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.util.DicReader;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.IHownetMeta;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.xml.namespace.QName;
+import javax.xml.stream.XMLEventReader;
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.events.StartElement;
+import javax.xml.stream.events.XMLEvent;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.zip.GZIPInputStream;
+
+/**
+ * 义原解析器
+ *
+ * @author itbluebox
+ */
+public abstract class SememeParser implements IHownetMeta, ISimilarity {
+    private static final Logger logger = LoggerFactory.getLogger(SememeParser.class);
+    /**
+     * 所有的义原都存放到一个MultiMap, Key为Sememe的中文定义, Value为义原的Id
+     */
+    protected static Multimap<String, String> SEMEMES = null;
+    private static final String path = Similarity.Config.SememeXmlPath;
+
+    public SememeParser() throws IOException {
+        if (SEMEMES != null) {
+            return;
+        }
+        SEMEMES = HashMultimap.create();
+        InputStream inputStream = new GZIPInputStream(DicReader.getInputStream(path));
+        load(inputStream);
+    }
+
+    /**
+     * 文件加载义原
+     */
+    private void load(InputStream inputStream) throws IOException {
+        long time = System.currentTimeMillis();
+        int count = 0;
+        try {
+            XMLInputFactory inputFactory = XMLInputFactory.newInstance();
+            XMLEventReader xmlEventReader = inputFactory.createXMLEventReader(inputStream);
+
+            while (xmlEventReader.hasNext()) {
+                XMLEvent event = xmlEventReader.nextEvent();
+                if (event.isStartElement()) {
+                    StartElement startElement = event.asStartElement();
+                    if (startElement.getName().toString().equals("sememe")) {
+                        String cnWord = startElement.getAttributeByName(QName.valueOf("cn")).getValue();
+                        String id = startElement.getAttributeByName(QName.valueOf("id")).getValue();
+                        SEMEMES.put(cnWord, id);
+                        count++;
+                    }
+                }
+            }
+            inputStream.close();
+        } catch (Exception e) {
+            logger.error("xml err:" + e.toString());
+            e.printStackTrace();
+        }
+        time = System.currentTimeMillis() - time;
+        logger.info("complete! count num:" + count + ". time spend:" + time + "ms");
+    }
+
+    /**
+     * 关联度
+     *
+     * @param sememeName1
+     * @param sememeName2
+     * @return
+     */
+    public double getAssociation(String sememeName1, String sememeName2) {
+        return 0.0;
+    }
+}

+ 140 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/hownet/sememe/SememeSimilarity.java

@@ -0,0 +1,140 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.sememe;
+
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.util.StringUtil;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.IHownetMeta;
+
+import java.io.IOException;
+import java.util.Collection;
+
+/**
+ * 义原相似度计算
+ *
+ * @author itbluebox
+ */
+public class SememeSimilarity extends SememeParser {
+
+    public SememeSimilarity() throws IOException {
+        super();
+    }
+
+    /**
+     * 计算两个义原的相似度
+     */
+    double getSimilarityBySememeId(final String id1, final String id2) {
+
+        int position = 0;
+        String[] array1 = id1.split("-");
+        String[] array2 = id2.split("-");
+        for (position = 0; position < array1.length && position < array2.length; position++) {
+            if (!array1[position].equals(array2[position])) {
+                break;
+            }
+        }
+
+        return 2.0 * position / (array1.length + array2.length);
+    }
+
+    /**
+     * 根据汉语定义计算义原之间的相似度,由于可能多个义元有相同的汉语词语,故计算结果为其中相似度最大者
+     *
+     * @return
+     */
+    public double getMaxSimilarity(String sememeName1, String sememeName2) {
+        double maxValue = 0.0;
+
+        // 如果两个字符串相等,直接返回距离为0
+        if (sememeName1.equals(sememeName2)) {
+            return 1.0;
+        }
+        Collection<String> sememeIds1 = SEMEMES.get(sememeName1);
+        Collection<String> sememeIds2 = SEMEMES.get(sememeName2);
+        // 如果sememe1或者sememe2不是义元,则返回0
+        if (sememeIds1.size() == 0 || sememeIds1.size() == 0) {
+            return 0.0;
+        }
+
+        for (String id1 : sememeIds1) {
+            for (String id2 : sememeIds2) {
+                double value = getSimilarityBySememeId(id1, id2);
+                if (value > maxValue) {
+                    maxValue = value;
+                }
+            }
+        }
+
+        return maxValue;
+    }
+
+    /**
+     * 计算两个义元之间的相似度,由于义元可能相同,计算结果为其中相似度最大者 similarity = alpha/(distance+alpha),
+     * 如果两个字符串相同或都为空,直接返回1.0
+     */
+    @Override
+    public double getSimilarity(String item1, String item2) {
+        if (StringUtil.isBlankAll(item2, item2)) {
+            return 1.0;
+        } else if (StringUtil.isBlankAtLeastOne(item1, item2)) {
+            return 0.0;
+        } else if (item1.equals(item2)) {
+            return 1.0;
+        }
+
+        String key1 = item1.trim();
+        String key2 = item2.trim();
+
+        // 去掉()符号
+        if ((key1.charAt(0) == '(') && (key1.charAt(key1.length() - 1) == ')')) {
+
+            if (key2.charAt(0) == '(' && key2.charAt(key2.length() - 1) == ')') {
+                key1 = key1.substring(1, key1.length() - 1);
+                key2 = key2.substring(1, key2.length() - 1);
+            } else {
+                return 0.0;
+            }
+
+        }
+
+        // 处理关系义元,即x=y的情况
+        int pos = key1.indexOf('=');
+        if (pos > 0) {
+            int pos2 = key2.indexOf('=');
+            // 如果是关系义元,则判断前面部分是否相同,如果相同,则转为计算后面部分的相似度,否则为0
+            if ((pos == pos2) && key1.substring(0, pos).equals(key2.substring(0, pos2))) {
+                key1 = key1.substring(pos + 1);
+                key2 = key2.substring(pos2 + 1);
+            } else {
+                return 0.0;
+            }
+        }
+
+        // 处理符号义元,即前面有特殊符号的义元
+        String symbol1 = key1.substring(0, 1);
+        String symbol2 = key2.substring(0, 1);
+
+        for (int i = 0; i < IHownetMeta.Symbol_Descriptions.length; i++) {
+            if (symbol1.equals(IHownetMeta.Symbol_Descriptions[i][0])) {
+                if (symbol1.equals(symbol2)) {
+                    key1 = item1.substring(1);
+                    key2 = item2.substring(1);
+                    break;
+                } else {
+                    return 0.0; // 如果不是同一关系符号,则相似度直接返回0
+                }
+            }
+        }
+
+        if ((pos = key1.indexOf("|")) >= 0) {
+            key1 = key1.substring(pos + 1);
+        }
+        if ((pos = key2.indexOf("|")) >= 0) {
+            key2 = key2.substring(pos + 1);
+        }
+
+        // 如果两个字符串相等,直接返回距离为0
+        if (key1.equals(key2)) {
+            return 1.0;
+        }
+
+        return getMaxSimilarity(key1, key2);
+    }
+}

+ 72 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/hownet/sememe/SememeType.java

@@ -0,0 +1,72 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.sememe;
+
+/**
+ * 义原的类型定义<br/>
+ * <ul>
+ * <li>1:Event|事件</li>
+ * <li>2:Entity|实体 </li>
+ * <li>3: Attribute|属性 </li>
+ * <li>4:Quantity|数量</li>
+ * <li>5:aValue|属性值</li>
+ * <li>6:qValue|数量值</li>
+ * <li>7: Secondary Feature|第二特征</li>
+ * <li>8: Syntax|语法</li>
+ * <li>9: EventRole|动态角色</li>
+ * <li>10: EventFeatures|动态属性</li>
+ * <li>0:未知</li>
+ * <p>
+ * 其中1~7为基本义元,8为语法义元,9、10为关系义元<br/>
+ *
+ * @author itbluebox
+ */
+public interface SememeType {
+    /**
+     * Event|事件类型定义
+     */
+    int Event = 1;
+
+    /**
+     * Entity|实体类型定义
+     */
+    int Entity = 2;
+
+    /**
+     * Attribute|属性类型定义
+     */
+    int Attribute = 3;
+
+    /**
+     * Quantity|数量类型定义
+     */
+    int Quantity = 4;
+
+    /**
+     * aValue|属性值类型定义
+     */
+    int AValue = 5;
+
+    /**
+     * qValue|数量值类型定义
+     */
+    int QValue = 6;
+
+    /**
+     * Secondary Feature|第二特征类型定义
+     */
+    int SecondaryFeature = 7;
+
+    /**
+     * Syntax|语法类型定义
+     */
+    int Syntax = 8;
+
+    /**
+     * EventRole|动态角色类型定义
+     */
+    int EventRoleAndFeature = 9;
+
+    /**
+     * 未知类型定义
+     */
+    int Unknown = 0;
+}

+ 153 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/pinyin/PinyinDictionary.java

@@ -0,0 +1,153 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.pinyin;
+
+import com.ruoyi.utils.resumeAnalysis.similarity.Similarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.util.DicReader;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * 查找汉字对应的拼音工具
+ *
+ * @author itbluebox
+ */
+public class PinyinDictionary {
+    private Map<Character, Set<String>> pinyinDict;
+    private static PinyinDictionary instance;
+    private static final String path =  Similarity.Config.PinyinPath;
+
+    private PinyinDictionary() throws IOException {
+        BufferedReader br = DicReader.getReader(path);
+        String line;
+        Event event = new Event();
+        while ((line = br.readLine()) != null) {
+            event.visit(line);
+        }
+        br.close();
+        this.pinyinDict = event.getPinyins();
+    }
+
+    public static PinyinDictionary getInstance() {
+        if (instance == null) {
+            try {
+                instance = new PinyinDictionary();
+            } catch (IOException e) {
+                e.printStackTrace();
+            }
+        }
+        return instance;
+    }
+
+    /**
+     * 获取汉字的拼音, 由于汉字具有多音字,故返回一个集合
+     *
+     * @param c
+     * @return
+     */
+    public Set<String> getPinyin(Character c) {
+        Set<String> set = pinyinDict.get(c);
+        if (set == null || set.size() == 0) {
+            set = new HashSet<>();
+            set.add(c.toString());
+        }
+        return set;
+    }
+
+    /**
+     * 获取词语的拼音, 一个词语可能对应多个拼音,把所有可能的组合放到集合中返回
+     *
+     * @param word
+     * @return
+     */
+    public Set<String> getPinyin(String word) {
+        Set<String> set = new HashSet<>();
+        for (int i = 0; i < word.length(); i++) {
+            Set<String> pinyinSet = getPinyin(word.charAt(i));
+            if (set == null || set.size() == 0) {
+                set.addAll(pinyinSet);
+                continue;
+            }
+            Set<String> tempSet = new HashSet<>();
+            for (String s : set) {
+                tempSet.addAll(pinyinSet.stream().map(p -> s + p).collect(Collectors.toList()));
+            }
+            set = tempSet;
+        }
+        return set;
+    }
+
+    /**
+     * 获取拼音字符串,多音字只取一个
+     *
+     * @param word
+     * @return
+     */
+    public String getPinyinSingle(String word) {
+        StringBuffer sb = new StringBuffer();
+        for (int i = 0; i < word.length(); i++) {
+            sb.append(getPinyin(word.charAt(i)).iterator().next());
+        }
+        return sb.toString();
+    }
+
+    /**
+     * 获取拼音串,对于多音字,给出所有拼音
+     *
+     * @param word
+     * @return
+     */
+    public String getPinyinString(String word) {
+        StringBuffer sb = new StringBuffer();
+        for (int i = 0; i < word.length(); i++) {
+            Set<String> pinyin = getPinyin(word.charAt(i));
+            sb.append(pinyin.toString());
+        }
+        return sb.toString();
+    }
+
+    /**
+     * 获取拼音首字母
+     *
+     * @param word
+     * @return
+     */
+    public String getPinyinHead(String word) {
+        StringBuffer sb = new StringBuffer();
+        for (int i = 0; i < word.length(); i++) {
+            sb.append(getPinyin(word.charAt(i)).iterator().next().charAt(0));
+        }
+        return sb.toString();
+    }
+
+    private static class Event {
+        private Map<Character, Set<String>> pinyins;
+
+        public Event() {
+            this.pinyins = new HashMap<>();
+        }
+
+        public Map<Character, Set<String>> getPinyins() {
+            return pinyins;
+        }
+
+        public boolean visit(String str) {
+            if (str.startsWith("//")) {
+                return true;
+            }
+            char c = str.charAt(0);
+            String pinyin = str.substring(2, str.length());
+            Set<String> set = pinyins.get(c);
+            if (set == null) {
+                set = new HashSet<>();
+            }
+            set.add(pinyin);
+            pinyins.put(c, set);
+            return true;
+        }
+    }
+}

+ 33 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/similarity/word/pinyin/PinyinSimilarity.java

@@ -0,0 +1,33 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.pinyin;
+
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.ISimilarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.util.EditDistance;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.util.MathUtil;
+
+import java.util.Set;
+
+/**
+ * 拼音计算两个词相似度,拼音用编辑距离表示相似程度
+ *
+ * @author itbluebox
+ */
+public class PinyinSimilarity implements ISimilarity {
+    @Override
+    public double getSimilarity(String word1, String word2) {
+        double max = 0.0;
+        Set<String> pinyinSet1 = PinyinDictionary.getInstance().getPinyin(word1);
+        Set<String> pinyinSet2 = PinyinDictionary.getInstance().getPinyin(word2);
+        for (String pinyin1 : pinyinSet1) {
+            for (String pinyin2 : pinyinSet2) {
+                double distance = new EditDistance().getEditDistance(pinyin1, pinyin2);
+                double similarity = 1 - distance / (MathUtil.max(pinyin1.length(), pinyin2.length()));
+                max = (max > similarity) ? max : similarity;
+                if (max == 1.0) {
+                    return max;
+                }
+            }
+        }
+
+        return max;
+    }
+}

+ 72 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/tendency/word/HownetWordTendency.java

@@ -0,0 +1,72 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.tendency.word;
+
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.concept.Concept;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.concept.ConceptParser;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.concept.ConceptSimilarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.sememe.SememeParser;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.sememe.SememeSimilarity;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * 知网词语倾向性
+ *
+ * @author itbluebox
+ */
+public class HownetWordTendency implements IWordTendency {
+    private static final Logger logger = LoggerFactory.getLogger(HownetWordTendency.class);
+    private ConceptParser conceptParser;
+    private SememeParser sememeParser;
+    public static String[] POSITIVE_SEMEMES = new String[]{"良", "喜悦", "夸奖", "满意", "期望", "注意", "致敬", "喜欢",
+            "专", "敬佩", "同意", "爱惜", "愿意", "思念", "拥护", "祝贺", "福", "需求", "奖励", "致谢", "欢迎", "羡慕",
+            "感激", "爱恋"};
+
+    public static String[] NEGATIVE_SEMEMES = new String[]{"莠", "谴责", "害怕", "生气", "悲哀", "着急", "轻视", "羞愧",
+            "烦恼", "灰心", "犹豫", "为难", "懊悔", "厌恶", "怀疑", "怜悯", "忧愁", "示怒", "不满", "仇恨", "埋怨",
+            "失望", "坏"};
+
+    public HownetWordTendency() {
+        this.conceptParser = ConceptSimilarity.getInstance();
+        try {
+            this.sememeParser = new SememeSimilarity();
+        } catch (IOException e) {
+            logger.error("exception:{}", e.getMessage());
+        }
+    }
+
+    @Override
+    public double getTendency(String word) {
+        double positive = getSentiment(word, POSITIVE_SEMEMES);
+        double negative = getSentiment(word, NEGATIVE_SEMEMES);
+        return positive - negative;
+    }
+
+    private double getSentiment(String word, String[] candidateSememes) {
+        Collection<Concept> concepts = conceptParser.getConcepts(word);
+        Set<String> sememes = new HashSet<>();
+        for (Concept concept : concepts) sememes.addAll(concept.getAllSememeNames());
+
+        double max = 0.0;
+        for (String item : sememes) {
+            double total = 0.0;
+            for (String positiveSememe : candidateSememes) {
+                double value = sememeParser.getSimilarity(item, positiveSememe);
+                // 如果有特别接近的义原,直接返回该相似值
+                if (value > 0.9) {
+                    return value;
+                }
+                total += value;
+            }
+            double sim = total / candidateSememes.length;
+            if (sim > max) {
+                max = sim;
+            }
+        }
+        return max;
+    }
+}

+ 17 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/tendency/word/IWordTendency.java

@@ -0,0 +1,17 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.tendency.word;
+
+/**
+ * 语义倾向性(情感分析)
+ *
+ * @author itbluebox
+ */
+public interface IWordTendency {
+    /**
+     * 获取词语的语义倾向性,
+     * 词语的语义倾向性为一个介于[-1, 1]之间的实数,数值越大,褒义性越强,否则,贬义性越强
+     *
+     * @param word
+     * @return
+     */
+    double getTendency(String word);
+}

+ 153 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/tendency/word/Training.java

@@ -0,0 +1,153 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.tendency.word;
+
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.Multimap;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.util.StringUtil;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.concept.Concept;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.concept.ConceptSimilarity;
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.word.hownet.sememe.SememeSimilarity;
+
+import java.io.*;
+import java.util.*;
+
+/**
+ * 临时训练及测试类
+ */
+public class Training {
+
+    void test(boolean testPositive) throws IOException {
+        IWordTendency tendency = new HownetWordTendency();
+        File f = new File("data/dict/sentiment/负面情感词语(中文).txt");
+        if (testPositive) {
+            f = new File("data/dict/sentiment/正面评价词语(中文).txt");
+        }
+        String encoding = "utf-8";
+        String line;
+        int wordCount = 0;
+        int correctCount = 0;
+
+        try (BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(f), encoding));) {
+            while ((line = in.readLine()) != null) {
+                if (line.length() > 5)
+                    continue;
+                wordCount++;
+
+                double value = tendency.getTendency(line.trim());
+                if (value > 0 && testPositive) {
+                    correctCount++;
+                } else if (value < 0 && !testPositive) {
+                    correctCount++;
+                } else {
+                    System.out.println("error:" + line + "\t value:" + value);
+                }
+            }
+        }
+        System.out.println("correct:" + correctCount);
+        System.out.println("total:" + wordCount);
+        System.out.println("ratio:" + correctCount * 1.0 / wordCount);
+    }
+
+    /**
+     * 该方法用于统计知网提供的情感词集合所涉及的义原以及出现频度
+     *
+     * @throws IOException
+     */
+    void countSentimentDistribution() throws IOException {
+        Map<String, Integer> sememeMap = new HashMap<>();
+        File f = new File("data/dict/sentiment/负面情感词语(中文).txt");
+        String encoding = "utf-8";
+        boolean autoCombineConcept = false;
+        BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(f), encoding));
+
+        ConceptSimilarity parser = new ConceptSimilarity(new SememeSimilarity());
+
+        String line;
+
+        int conceptCount = 0;
+        int wordCount = 0;
+        while ((line = in.readLine()) != null) {
+            if (line.length() > 5)
+                continue;
+            wordCount++;
+            String word = line.trim();
+            Collection<Concept> concepts = parser.getInnerConcepts(word);
+            //由于目前的词典为知网2000版本,所以默认情况下仅对词典中出现的概念进行统计
+            if (StringUtil.isBlank(concepts) && autoCombineConcept) {
+                concepts = parser.autoCombineConcepts(word, null);
+            }
+            for (Concept c : concepts) {
+                conceptCount++;
+                List<String> names = new ArrayList<String>();
+
+                //加入主义原
+                names.add(c.getMainSememe());
+
+                //加入关系义原
+                for (String item : c.getRelationSememes()) {
+                    names.add(item.substring(item.indexOf("=") + 1));
+                }
+
+                //加入符号义原
+                for (String item : c.getSymbolSememes()) {
+                    names.add(item.substring(1));
+                }
+
+                //加入其他义原集合
+                for (String item : c.getSecondSememes()) {
+                    names.add(item);
+                }
+
+                for (String item : names) {
+                    Integer count = sememeMap.get(item);
+                    if (count == null) {
+                        sememeMap.put(item, 1);
+                    } else {
+                        sememeMap.put(item, count + 1);
+                    }
+                }
+            }
+        }
+        in.close();
+
+        //以下是为了按照义原出现的数量进行排序的代码
+        Multimap<Integer, String> map2 = HashMultimap.create();
+        for (String key : sememeMap.keySet()) {
+            map2.put(sememeMap.get(key), key);
+        }
+        List<Integer> keys = new ArrayList<>();
+        keys.addAll(map2.keySet());
+        Collections.sort(keys);
+
+        int smallSememeCount = 0; //较少出现的不同义原数量
+        int smallAppearTotal = 0; //较少出现的义原在概念众出现的次数总和
+        for (int index = (keys.size() - 1); index >= 0; index--) {
+            Integer key = keys.get(index);
+            Collection<String> values = map2.get(key);
+            double ratio = (key * 100.0 / conceptCount);
+            System.out.print(key + "(" + ratio + "%): ");
+            for (String v : values) {
+                System.out.print(v + "\t");
+            }
+            System.out.println();
+            if (ratio < 0.7) {
+                smallSememeCount += values.size();
+                smallAppearTotal += key * values.size();
+            }
+        }
+
+        System.out.println("small info: ");
+        System.out.println("\tdifferent sememes:" + smallSememeCount);
+        System.out.println("\tappear count:" + smallAppearTotal);
+        System.out.println("\tratio:" + smallAppearTotal * 100.0 / conceptCount);
+        System.out.println("wordCount:" + wordCount);
+        System.out.println("conceptCount:" + conceptCount);
+    }
+
+    public static void main(String[] args) throws IOException {
+        Training training = new Training();
+        training.countSentimentDistribution();
+        //        System.out.println("test positive:");
+        //        training.test(true);
+    }
+
+}

+ 97 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/tokenizer/Tokenizer.java

@@ -0,0 +1,97 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.tokenizer;
+
+import com.hankcs.hanlp.HanLP;
+import com.hankcs.hanlp.corpus.io.IOUtil;
+import com.hankcs.hanlp.seg.Segment;
+import com.hankcs.hanlp.seg.common.Term;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
+
+/**
+ * 对中文分词的封装,封装了对Xmnlp(itbluebox对HanLP改进版)的调用
+ * 对分词器的调用采用了单例模式,实现需要时的延迟加载。
+ *
+ * @author itbluebox
+ */
+public class Tokenizer {
+    private static final Logger logger = LoggerFactory.getLogger(Tokenizer.class);
+
+    public static List<Word> segment(String sentence) {
+        List<Word> results = new ArrayList<>();
+        /*// ansj_seg
+        List<org.xm.ansj.domain.Term> termList = StandardSegmentation.parse(sentence).getTerms();//ansj
+        results.addAll(termList
+                .stream()
+                .map(term -> new Word(term.getName(), term.getNature().natureStr))
+                .collect(Collectors.toList())
+        );*/
+
+        /*//Xmnlp
+        List<org.xm.xmnlp.seg.domain.Term> termList = Xmnlp.segment(sentence);
+        results.addAll(termList
+                .stream()
+                .map(term -> new Word(term.word, term.getNature().name()))
+                .collect(Collectors.toList())
+        );*/
+
+        // HanLP
+        List<Term> termList = HanLP.segment(sentence);
+        results.addAll(termList
+                .stream()
+                .map(term -> new Word(term.word, term.nature.toString()))
+                .collect(Collectors.toList())
+        );
+
+        return results;
+    }
+
+    public static void fileSegment(String inputFilePath, String outputFilePath) {
+        fileSegment(HanLP.newSegment(), inputFilePath, outputFilePath);
+    }
+
+    public static void fileSegment(Segment segment, String inputFilePath, String outputFilePath) {
+        try {
+            WordFreqStatistics.statistics(segment, inputFilePath);
+            BufferedReader reader = IOUtil.newBufferedReader(inputFilePath);
+            long allCount = 0;
+            long lexCount = 0;
+            long start = System.currentTimeMillis();
+            String outPath = inputFilePath.replace(".txt", "") + "-Segment-Result.txt";
+            if (outputFilePath != null && outputFilePath.trim().length() > 0) outPath = outputFilePath;
+            FileOutputStream fos = new FileOutputStream(new File(outPath));
+            String temp;
+            while ((temp = reader.readLine()) != null) {
+                List<Term> parse = segment.seg(temp);
+                StringBuilder sb = new StringBuilder();
+                for (Term term : parse) {
+                    sb.append(term.toString() + "\t");
+                    if (term.word.trim().length() > 0) {
+                        allCount += term.length();
+                        lexCount += 1;
+                    }
+                }
+                fos.write(sb.toString().trim().getBytes());
+                fos.write("\n".getBytes());
+            }
+
+            fos.flush();
+            fos.close();
+            reader.close();
+            long end = System.currentTimeMillis();
+            System.out.println("segment result save:" + outPath);
+            System.out.println("共 " + allCount + " 个字符,共 " + lexCount + " 个词语,用时" + (end - start) + "毫秒," +
+                    "每秒处理了:" + (allCount * 1000 / (end - start)));
+        } catch (IOException e) {
+            logger.error("IO error: " + e.getLocalizedMessage());
+        }
+    }
+}

+ 121 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/tokenizer/Word.java

@@ -0,0 +1,121 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.tokenizer;
+
+import com.hankcs.hanlp.HanLP;
+
+import java.util.Objects;
+
+/**
+ * 封装分词结果
+ *
+ * @author itbluebox
+ */
+public class Word implements Comparable {
+    // 词名
+    private String name;
+    // 词性
+    private String pos;
+    // 权重,用于词向量分析
+    private Float weight;
+    private int frequency;
+
+    public Word(String name) {
+        this.name = name;
+    }
+
+    public Word(String name, String pos) {
+        this.name = name;
+        this.pos = pos;
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public void setName(String name) {
+        this.name = name;
+    }
+
+    public String getPos() {
+        return pos;
+    }
+
+    public void setPos(String pos) {
+        this.pos = pos;
+    }
+
+    public Float getWeight() {
+        return weight;
+    }
+
+    public void setWeight(Float weight) {
+        this.weight = weight;
+    }
+
+    public int getFrequency() {
+        return frequency;
+    }
+
+    public void setFrequency(int frequency) {
+        this.frequency = frequency;
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hashCode(this.name);
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (obj == null) {
+            return false;
+        }
+        if (getClass() != obj.getClass()) {
+            return false;
+        }
+        final Word other = (Word) obj;
+        return Objects.equals(this.name, other.name);
+    }
+
+    public String toString(String split) {
+        if (HanLP.Config.ShowTermNature)
+            return name + split + pos;
+        return name;
+    }
+
+    @Override
+    public String toString() {
+        StringBuilder str = new StringBuilder();
+        if (name != null) {
+            str.append(name);
+        }
+        if (pos != null) {
+            str.append("/").append(pos);
+        }
+        if (frequency > 0) {
+            str.append("/").append(frequency);
+        }
+        return str.toString();
+    }
+
+    @Override
+    public int compareTo(Object o) {
+        if (this == o) {
+            return 0;
+        }
+        if (this.name == null) {
+            return -1;
+        }
+        if (o == null) {
+            return 1;
+        }
+        if (!(o instanceof Word)) {
+            return 1;
+        }
+        String t = ((Word) o).getName();
+        if (t == null) {
+            return 1;
+        }
+        return this.name.compareTo(t);
+    }
+}
+

+ 154 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/tokenizer/WordFreqStatistics.java

@@ -0,0 +1,154 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.tokenizer;
+
+
+import com.hankcs.hanlp.HanLP;
+import com.hankcs.hanlp.corpus.io.IOUtil;
+import com.hankcs.hanlp.seg.Segment;
+import com.ruoyi.utils.resumeAnalysis.similarity.Similarity;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.Collectors;
+
+
+/**
+ * 词频统计工具
+ *
+ * @author itbluebox
+ */
+public class WordFreqStatistics {
+    protected static final Logger logger = LoggerFactory.getLogger(WordFreqStatistics.class);
+    private String resultPath = Similarity.Config.StatisticsResultPath;
+    private Map<String, AtomicInteger> statisticsMap = new ConcurrentHashMap<>();
+    private Segment segment = HanLP.newSegment();
+
+    /**
+     * 获取词频统计结果保存路径
+     *
+     * @return 词频统计结果保存路径
+     */
+    public String getResultPath() {
+        return resultPath;
+    }
+
+    /**
+     * 设置词频统计结果保存路径
+     *
+     * @param resultPath 词频统计结果保存路径
+     */
+    public void setResultPath(String resultPath) {
+        this.resultPath = resultPath;
+    }
+
+    public Segment getSegment() {
+        return segment;
+    }
+
+    public void setSegment(Segment segment) {
+        this.segment = segment;
+    }
+
+    public WordFreqStatistics() {
+    }
+
+    public WordFreqStatistics(String resultPath) {
+        this.resultPath = resultPath;
+    }
+
+    public WordFreqStatistics(Segment segment) {
+        this.segment = segment;
+    }
+
+    public WordFreqStatistics(String resultPath, Segment segment) {
+        this.resultPath = resultPath;
+        this.segment = segment;
+    }
+
+    private void statistics(String word, int times, Map<String, AtomicInteger> container) {
+        container.putIfAbsent(word, new AtomicInteger());
+        container.get(word).addAndGet(times);
+    }
+
+    private void dump(Map<String, AtomicInteger> map, String path) {
+        try {
+            //score rank
+            List<String> list = map.entrySet()
+                    .parallelStream()
+                    .sorted((a, b) -> new Integer(b.getValue().get()).compareTo(a.getValue().intValue()))
+                    .map(entry -> entry.getKey().toString() + "\t" + entry.getValue().get())
+                    .collect(Collectors.toList());
+            Files.write(Paths.get(path), list);
+            if (list.size() < 100) {
+                System.out.println("word statistics result:");
+                AtomicInteger i = new AtomicInteger();
+                list.forEach(item -> System.out.println("\t" + i.incrementAndGet() + "、" + item));
+            }
+            System.out.println("statistic result save:" + path);
+        } catch (Exception e) {
+            logger.error("dump error!");
+        }
+    }
+
+    public void seg(String text) {
+        segment.seg(text).parallelStream().forEach(i -> statistics(i.toString(), 1, statisticsMap));
+    }
+
+    /**
+     * 将词频统计结果保存到文件
+     *
+     * @param resultPath 词频统计结果保存路径
+     */
+    public void dump(String resultPath) {
+        this.resultPath = resultPath;
+        dump();
+    }
+
+    /**
+     * 将词频统计结果保存到文件
+     */
+    public void dump() {
+        dump(this.statisticsMap, this.resultPath);
+    }
+
+    /**
+     * 清除之前的统计结果
+     */
+    public void reset() {
+        this.statisticsMap.clear();
+    }
+
+    public Map<String, AtomicInteger> getStatisticsMap() {
+        return statisticsMap;
+    }
+
+    public void setStatisticsMap(Map<String, AtomicInteger> statisticsMap) {
+        this.statisticsMap = statisticsMap;
+    }
+
+    public static void statistics(Segment segment, String inputFilePath) {
+        try {
+            //词频统计
+            WordFreqStatistics statistic = new WordFreqStatistics(segment);
+            BufferedReader reader = IOUtil.newBufferedReader(inputFilePath);
+            String t;
+            StringBuilder s = new StringBuilder();
+            while ((t = reader.readLine()) != null) {
+                s.append(t);
+            }
+            statistic.seg(s.toString());
+            statistic.setResultPath(inputFilePath.replace(".txt", "") + "-WordFrequencyStatistics-Result.txt");
+            statistic.dump();
+            reader.close();
+        } catch (IOException e) {
+            logger.error("IO error: " + e.getLocalizedMessage());
+        }
+    }
+}

+ 54 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/word2vec/Word2vec.java

@@ -0,0 +1,54 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.word2vec;
+
+import com.ruoyi.utils.resumeAnalysis.similarity.word2vec.domain.WordEntry;
+import com.ruoyi.utils.resumeAnalysis.similarity.word2vec.vec.Learn;
+import com.ruoyi.utils.resumeAnalysis.similarity.word2vec.vec.ModelParser;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * Word2vec java版 工具包
+ *
+ * @author itbluebox
+ */
+public class Word2vec {
+    private static final Logger logger = LoggerFactory.getLogger(Word2vec.class);
+
+    public static String trainModel(String inputfilePath, String modelPath) throws IOException {
+        File inputfile = new File(inputfilePath);
+        if (inputfile == null) {
+            return "";
+        }
+        String out = inputfile.getPath() + ".model";
+        if (modelPath != null && modelPath.trim().length() > 0) out = modelPath;
+        File outFile = new File(out);
+        Learn learn = new Learn();
+        learn.learnFile(inputfile);
+        learn.saveModel(outFile);
+        return out;
+    }
+
+    public static float[] parser(String modelPath, String word) throws IOException {
+        ModelParser parser = new ModelParser();
+        parser.loadModel(modelPath);
+        float[] result = parser.getWordVector(word);
+        return result;
+    }
+
+    public static List<String> getHomoionym(String modelPath, String word, int topN) throws IOException {
+        List<String> result = new ArrayList<>(topN);
+        ModelParser parser = new ModelParser();
+        parser.loadModel(modelPath);
+        parser.setTopNSize(topN);
+        Set<WordEntry> wordEntrySet = parser.distance(word);
+        result.addAll(wordEntrySet.stream().map(wordEntry -> wordEntry.name).collect(Collectors.toList()));
+        return result;
+    }
+}

+ 14 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/word2vec/domain/HiddenNeuron.java

@@ -0,0 +1,14 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.word2vec.domain;
+
+/**
+ * 隐藏神经元
+ *
+ * @author itbluebox
+ */
+public class HiddenNeuron extends Neuron {
+    public double[] syn1;// 隐藏层 -> 输出层
+
+    public HiddenNeuron(int layerSize) {
+        syn1 = new double[layerSize];
+    }
+}

+ 27 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/word2vec/domain/Neuron.java

@@ -0,0 +1,27 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.word2vec.domain;
+
+/**
+ * 神经元
+ *
+ * @author itbluebox
+ */
+public abstract class Neuron implements Comparable<Neuron> {
+    public double freq;
+    public Neuron parent;
+    public int code;
+    public int category = -1;
+
+    @Override
+    public int compareTo(Neuron o) {
+        if (this.category == o.category) {
+            if (this.freq > o.freq) {
+                return 1;
+            } else
+                return -1;
+        } else if (this.category > o.category) {
+            return 1;
+        } else {
+            return 0;
+        }
+    }
+}

+ 28 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/word2vec/domain/WordEntry.java

@@ -0,0 +1,28 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.word2vec.domain;
+
+/**
+ * 词语属性
+ *
+ * @author itbluebox
+ */
+public class WordEntry implements Comparable<WordEntry> {
+    public String name;
+    public float score;
+
+    public WordEntry(String name, float score) {
+        this.name = name;
+        this.score = score;
+    }
+
+    public String toString() {
+        return "name:" + this.name + "\tscore:" + this.score;
+    }
+
+    @Override
+    public int compareTo(WordEntry o) {
+        if (this.score < o.score) {
+            return 1;
+        } else
+            return -1;
+    }
+}

+ 64 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/word2vec/domain/WordNeuron.java

@@ -0,0 +1,64 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.word2vec.domain;
+
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Random;
+
+/**
+ * 词语
+ *
+ * @author itbluebox
+ */
+public class WordNeuron extends Neuron {
+    public String name;
+    public double[] syn0;
+    public List<Neuron> neurons;// 路径
+    public int[] codeArray;
+
+    public List<Neuron> makeNeurons() {
+        if (neurons != null) return neurons;
+        Neuron neuron = this;
+        neurons = new LinkedList<>();
+        while ((neuron = neuron.parent) != null) {
+            neurons.add(neuron);
+        }
+        Collections.reverse(neurons);
+        codeArray = new int[neurons.size()];
+        for (int i = 1; i < neurons.size(); i++) {
+            codeArray[i - 1] = neurons.get(i).code;
+        }
+
+        codeArray[codeArray.length - 1] = this.code;
+        return neurons;
+    }
+
+    public WordNeuron(String name, double freq, int layerSize) {
+        this.name = name;
+        this.freq = freq;
+        this.syn0 = new double[layerSize];
+        Random random = new Random();
+        for (int i = 0; i < syn0.length; i++) {
+            syn0[i] = (random.nextDouble() - 0.5) / layerSize;
+        }
+    }
+
+    /**
+     * huffman tree
+     *
+     * @param name
+     * @param freq
+     * @param category
+     * @param layerSize
+     */
+    public WordNeuron(String name, double freq, int category, int layerSize) {
+        this.name = name;
+        this.freq = freq;
+        this.syn0 = new double[layerSize];
+        this.category = category;
+        Random random = new Random();
+        for (int i = 0; i < syn0.length; i++) {
+            syn0[i] = (random.nextDouble() - 0.5) / layerSize;
+        }
+    }
+}

+ 42 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/word2vec/vec/Huffman.java

@@ -0,0 +1,42 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.word2vec.vec;
+
+import com.ruoyi.utils.resumeAnalysis.similarity.word2vec.domain.HiddenNeuron;
+import com.ruoyi.utils.resumeAnalysis.similarity.word2vec.domain.Neuron;
+
+import java.util.Collection;
+import java.util.TreeSet;
+
+/**
+ * huffman编码
+ *
+ * @author itbluebox
+ */
+public class Huffman {
+    private int layerSize;
+
+    public Huffman(int layerSize) {
+        this.layerSize = layerSize;
+    }
+
+    private TreeSet<Neuron> set = new TreeSet<>();
+
+    public void buildTree(Collection<Neuron> neurons) {
+        set.addAll(neurons);
+        while (set.size() > 1) {
+            merge();
+        }
+    }
+
+    private void merge() {
+        HiddenNeuron hn = new HiddenNeuron(layerSize);
+        Neuron neuron1 = set.pollFirst();
+        Neuron neuron2 = set.pollFirst();
+        hn.category = neuron2.category;
+        hn.freq = neuron1.freq + neuron2.freq;
+        neuron1.parent = hn;
+        neuron2.parent = hn;
+        neuron1.code = 0;
+        neuron2.code = 1;
+        set.add(hn);
+    }
+}

+ 395 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/word2vec/vec/Learn.java

@@ -0,0 +1,395 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.word2vec.vec;
+
+import com.ruoyi.utils.resumeAnalysis.similarity.word2vec.domain.HiddenNeuron;
+import com.ruoyi.utils.resumeAnalysis.similarity.word2vec.domain.Neuron;
+import com.ruoyi.utils.resumeAnalysis.similarity.word2vec.domain.WordNeuron;
+
+import java.io.*;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * 学习
+ *
+ * @author itbluebox
+ */
+public class Learn {
+    private Map<String, Neuron> wordMap = new HashMap<>();
+    private int layerSize = 200;// 特征数
+    private int window = 5;
+    private double sample = 1e-3;
+    private double alpha = 0.025;
+    private double startingAlpha = alpha;
+    public int EXP_TABLE_SIZE = 1000;
+    private boolean isCbow = false;
+    private double[] expTable = new double[EXP_TABLE_SIZE];
+    private int trainWordsCount = 0;
+    private int MAX_EXP = 6;
+
+    public Learn(Boolean isCbow, Integer layerSize, Integer window, Double alpha, Double sample) {
+        createExpTable();
+        this.isCbow = isCbow;
+        this.layerSize = layerSize;
+        this.window = window;
+        this.alpha = alpha;
+        this.sample = sample;
+    }
+
+    public Learn() {
+        createExpTable();
+    }
+
+    /**
+     * 预计算:exp() table f(x) = x / (x + 1)
+     */
+    private void createExpTable() {
+        for (int i = 0; i < EXP_TABLE_SIZE; i++) {
+            expTable[i] = Math.exp(((i / (double) EXP_TABLE_SIZE * 2 - 1) * MAX_EXP));
+            expTable[i] = expTable[i] / (expTable[i] + 1);
+        }
+    }
+
+    /**
+     * 训练模型
+     *
+     * @param file
+     * @throws IOException
+     */
+    private void trainModel(File file) throws IOException {
+        try (BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file)))) {
+            String temp;
+            long nextRandom = 5;
+            int wordCount = 0;
+            int lastWordCount = 0;
+            int wordCountActual = 0;
+            while ((temp = br.readLine()) != null) {
+                if (wordCount - lastWordCount > 10000) {
+                    System.out.println("alpha:" + alpha + "\tProgress: "
+                            + (int) (wordCountActual / (double) (trainWordsCount + 1) * 100) + "%");
+                    wordCountActual += wordCount - lastWordCount;
+                    lastWordCount = wordCount;
+                    alpha = startingAlpha * (1 - wordCountActual / (double) (trainWordsCount + 1));
+                    if (alpha < startingAlpha * 0.0001) {
+                        alpha = startingAlpha * 0.0001;
+                    }
+                }
+                String[] strs = temp.split("[\t ]+");
+                wordCount += strs.length;
+                List<WordNeuron> sentence = new ArrayList<WordNeuron>();
+                for (int i = 0; i < strs.length; i++) {
+                    Neuron entry = wordMap.get(strs[i]);
+                    if (entry == null) {
+                        continue;
+                    }
+                    // The subsampling randomly discards frequent words while keeping the ranking same
+                    if (sample > 0) {
+                        double ran = (Math.sqrt(entry.freq / (sample * trainWordsCount)) + 1)
+                                * (sample * trainWordsCount) / entry.freq;
+                        nextRandom = nextRandom * 25214903917L + 11;
+                        if (ran < (nextRandom & 0xFFFF) / (double) 65536) {
+                            continue;
+                        }
+                    }
+                    sentence.add((WordNeuron) entry);
+                }
+
+                for (int index = 0; index < sentence.size(); index++) {
+                    nextRandom = nextRandom * 25214903917L + 11;
+                    if (isCbow) {
+                        cbowGram(index, sentence, (int) nextRandom % window);
+                    } else {
+                        skipGram(index, sentence, (int) nextRandom % window);
+                    }
+                }
+
+            }
+            System.out.println("Vocab size: " + wordMap.size());
+            System.out.println("Words in train file: " + trainWordsCount);
+            System.out.println("sucess train over!");
+        }
+    }
+
+    /**
+     * skip gram 模型训练
+     */
+    private void skipGram(int index, List<WordNeuron> sentence, int b) {
+        WordNeuron word = sentence.get(index);
+        int a, c = 0;
+        for (a = b; a < window * 2 + 1 - b; a++) {
+            if (a == window) {
+                continue;
+            }
+            c = index - window + a;
+            if (c < 0 || c >= sentence.size()) {
+                continue;
+            }
+
+            double[] neu1e = new double[layerSize];// 误差项
+            // HIERARCHICAL SOFTMAX
+            List<Neuron> neurons = word.neurons;
+            WordNeuron we = sentence.get(c);
+            for (int i = 0; i < neurons.size(); i++) {
+                HiddenNeuron out = (HiddenNeuron) neurons.get(i);
+                double f = 0;
+                // Propagate hidden -> output
+                for (int j = 0; j < layerSize; j++) {
+                    f += we.syn0[j] * out.syn1[j];
+                }
+                if (f <= -MAX_EXP || f >= MAX_EXP) {
+                    continue;
+                } else {
+                    f = (f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2);
+                    f = expTable[(int) f];
+                }
+                // 'g' is the gradient multiplied by the learning rate
+                double g = (1 - word.codeArray[i] - f) * alpha;
+                // Propagate errors output -> hidden
+                for (c = 0; c < layerSize; c++) {
+                    neu1e[c] += g * out.syn1[c];
+                }
+                // Learn weights hidden -> output
+                for (c = 0; c < layerSize; c++) {
+                    out.syn1[c] += g * we.syn0[c];
+                }
+            }
+
+            // Learn weights input -> hidden
+            for (int j = 0; j < layerSize; j++) {
+                we.syn0[j] += neu1e[j];
+            }
+        }
+
+    }
+
+    /**
+     * 词袋模型
+     *
+     * @param index
+     * @param sentence
+     * @param b
+     */
+    private void cbowGram(int index, List<WordNeuron> sentence, int b) {
+        WordNeuron word = sentence.get(index);
+        int a, c;
+
+        List<Neuron> neurons = word.neurons;
+        double[] neu1e = new double[layerSize];// 误差项
+        double[] neu1 = new double[layerSize];// 误差项
+        WordNeuron last_word;
+
+        for (a = b; a < window * 2 + 1 - b; a++)
+            if (a != window) {
+                c = index - window + a;
+                if (c < 0)
+                    continue;
+                if (c >= sentence.size())
+                    continue;
+                last_word = sentence.get(c);
+                if (last_word == null)
+                    continue;
+                for (c = 0; c < layerSize; c++)
+                    neu1[c] += last_word.syn0[c];
+            }
+
+        // HIERARCHICAL SOFTMAX
+        for (int d = 0; d < neurons.size(); d++) {
+            HiddenNeuron out = (HiddenNeuron) neurons.get(d);
+            double f = 0;
+            // Propagate hidden -> output
+            for (c = 0; c < layerSize; c++)
+                f += neu1[c] * out.syn1[c];
+            if (f <= -MAX_EXP)
+                continue;
+            else if (f >= MAX_EXP)
+                continue;
+            else
+                f = expTable[(int) ((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))];
+            // 'g' is the gradient multiplied by the learning rate
+            // double g = (1 - word.codeArray[d] - f) * alpha;
+            // double g = f*(1-f)*( word.codeArray[i] - f) * alpha;
+            double g = f * (1 - f) * (word.codeArray[d] - f) * alpha;
+            //
+            for (c = 0; c < layerSize; c++) {
+                neu1e[c] += g * out.syn1[c];
+            }
+            // Learn weights hidden -> output
+            for (c = 0; c < layerSize; c++) {
+                out.syn1[c] += g * neu1[c];
+            }
+        }
+        for (a = b; a < window * 2 + 1 - b; a++) {
+            if (a != window) {
+                c = index - window + a;
+                if (c < 0)
+                    continue;
+                if (c >= sentence.size())
+                    continue;
+                last_word = sentence.get(c);
+                if (last_word == null)
+                    continue;
+                for (c = 0; c < layerSize; c++)
+                    last_word.syn0[c] += neu1e[c];
+            }
+
+        }
+    }
+
+    /**
+     * 读入词语,并统计词频
+     *
+     * @param file
+     * @throws IOException
+     */
+    private void readWord(File file) throws IOException {
+        VecMap<String> map = new VecMap<>();
+        try (BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file)))) {
+            String temp;
+            while ((temp = br.readLine()) != null) {
+                String[] split = temp.split("[\t ]+");
+                trainWordsCount += split.length;
+                for (String s : split) {
+                    map.add(s);
+                }
+            }
+        }
+        for (Map.Entry<String, Integer> ele : map.getHm().entrySet()) {
+            wordMap.put(ele.getKey(), new WordNeuron(ele.getKey(), (double) ele.getValue() / map.size(), layerSize));
+        }
+    }
+
+    /**
+     * 对文本进行预分类
+     *
+     * @param files
+     * @throws IOException
+     * @throws FileNotFoundException
+     */
+    private void readVocabWithSupervised(File[] files) throws IOException {
+        for (int category = 0; category < files.length; category++) {
+            // 对多个文件学习
+            VecMap<String> map = new VecMap<>();
+            try (BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(files[category])))) {
+                String temp;
+                while ((temp = br.readLine()) != null) {
+                    String[] split = temp.split("[\t ]+");
+                    trainWordsCount += split.length;
+                    for (String string : split) map.add(string);
+                }
+            }
+            for (Map.Entry<String, Integer> element : map.getHm().entrySet()) {
+                double tarFreq = (double) element.getValue() / map.size();
+                if (wordMap.get(element.getKey()) != null) {
+                    double srcFreq = wordMap.get(element.getKey()).freq;
+                    if (srcFreq >= tarFreq) {
+                        continue;
+                    }
+                    Neuron wordNeuron = wordMap.get(element.getKey());
+                    wordNeuron.category = category;
+                    wordNeuron.freq = tarFreq;
+                } else {
+                    wordMap.put(element.getKey(), new WordNeuron(element.getKey(), tarFreq, category, layerSize));
+                }
+            }
+        }
+    }
+
+    /**
+     * 根据文件学习
+     *
+     * @param file
+     * @throws IOException
+     */
+    public void learnFile(File file) throws IOException {
+        readWord(file);
+        new Huffman(layerSize).buildTree(wordMap.values());
+
+        // 查找每个神经元
+        for (Neuron neuron : wordMap.values()) {
+            ((WordNeuron) neuron).makeNeurons();
+        }
+
+        trainModel(file);
+    }
+
+    /**
+     * 根据预分类的文件学习
+     *
+     * @param summaryFile     合并文件
+     * @param classifiedFiles 分类文件
+     * @throws IOException
+     */
+    public void learnFile(File summaryFile, File[] classifiedFiles)
+            throws IOException {
+        readVocabWithSupervised(classifiedFiles);
+        new Huffman(layerSize).buildTree(wordMap.values());
+        // 查找每个神经元
+        for (Neuron neuron : wordMap.values()) {
+            ((WordNeuron) neuron).makeNeurons();
+        }
+        trainModel(summaryFile);
+    }
+
+    /**
+     * 保存模型
+     */
+    public void saveModel(File file) {
+        try (DataOutputStream dataOutputStream = new DataOutputStream(
+                new BufferedOutputStream(new FileOutputStream(file)))) {
+            dataOutputStream.writeInt(wordMap.size());
+            dataOutputStream.writeInt(layerSize);
+            double[] syn0 = null;
+            for (Map.Entry<String, Neuron> element : wordMap.entrySet()) {
+                dataOutputStream.writeUTF(element.getKey());
+                syn0 = ((WordNeuron) element.getValue()).syn0;
+                for (double d : syn0) {
+                    dataOutputStream.writeFloat(((Double) d).floatValue());
+                }
+            }
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+    }
+
+    public int getLayerSize() {
+        return layerSize;
+    }
+
+    public void setLayerSize(int layerSize) {
+        this.layerSize = layerSize;
+    }
+
+    public int getWindow() {
+        return window;
+    }
+
+    public void setWindow(int window) {
+        this.window = window;
+    }
+
+    public double getSample() {
+        return sample;
+    }
+
+    public void setSample(double sample) {
+        this.sample = sample;
+    }
+
+    public double getAlpha() {
+        return alpha;
+    }
+
+    public void setAlpha(double alpha) {
+        this.alpha = alpha;
+        this.startingAlpha = alpha;
+    }
+
+    public boolean isCbow() {
+        return isCbow;
+    }
+
+    public void setCbow(boolean cbow) {
+        isCbow = cbow;
+    }
+}

+ 244 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/word2vec/vec/ModelParser.java

@@ -0,0 +1,244 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.word2vec.vec;
+
+
+import com.ruoyi.utils.resumeAnalysis.similarity.word2vec.domain.WordEntry;
+
+import java.io.*;
+import java.util.*;
+
+/**
+ * word2vec 模型的使用
+ *
+ * @author itbluebox
+ */
+public class ModelParser {
+    private int words;
+    private int size;
+    private int topNSize = 40;
+    private HashMap<String, float[]> wordMap = new HashMap<>();
+    private static final int MAX_SIZE = 50;
+
+    public void loadModel(String path) throws IOException {
+        try (DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(path)))) {
+            words = dis.readInt();
+            size = dis.readInt();
+            float vector;
+            String key;
+            float[] value;
+            for (int i = 0; i < words; i++) {
+                double len = 0.0;
+                key = dis.readUTF();
+                value = new float[size];
+                for (int j = 0; j < size; j++) {
+                    vector = dis.readFloat();
+                    len += vector * vector;
+                    value[j] = vector;
+                }
+                len = Math.sqrt(len);
+                for (int j = 0; j < size; j++) {
+                    value[j] /= len;
+                }
+                wordMap.put(key, value);
+            }
+        }
+    }
+
+    /**
+     * 近义词
+     */
+    public TreeSet<WordEntry> analogy(String word0, String word1, String word2) {
+        float[] wv0 = getWordVector(word0);
+        float[] wv1 = getWordVector(word1);
+        float[] wv2 = getWordVector(word2);
+
+        if (wv1 == null || wv2 == null || wv0 == null) {
+            return null;
+        }
+        float[] wordVector = new float[size];
+        for (int i = 0; i < size; i++) {
+            wordVector[i] = wv1[i] - wv0[i] + wv2[i];
+        }
+        float[] tempVector;
+        String name;
+        List<WordEntry> wordEntrys = new ArrayList<WordEntry>(topNSize);
+        for (Map.Entry<String, float[]> entry : wordMap.entrySet()) {
+            name = entry.getKey();
+            if (name.equals(word0) || name.equals(word1) || name.equals(word2)) {
+                continue;
+            }
+            float dist = 0;
+            tempVector = entry.getValue();
+            for (int i = 0; i < wordVector.length; i++) {
+                dist += wordVector[i] * tempVector[i];
+            }
+            insertTopN(name, dist, wordEntrys);
+        }
+        return new TreeSet<WordEntry>(wordEntrys);
+    }
+
+    private void insertTopN(String name, float score, List<WordEntry> wordsEntrys) {
+        if (wordsEntrys.size() < topNSize) {
+            wordsEntrys.add(new WordEntry(name, score));
+            return;
+        }
+        float min = Float.MAX_VALUE;
+        int minOffe = 0;
+        for (int i = 0; i < topNSize; i++) {
+            WordEntry wordEntry = wordsEntrys.get(i);
+            if (min > wordEntry.score) {
+                min = wordEntry.score;
+                minOffe = i;
+            }
+        }
+
+        if (score > min) {
+            wordsEntrys.set(minOffe, new WordEntry(name, score));
+        }
+
+    }
+
+    public Set<WordEntry> distance(String queryWord) {
+        float[] center = wordMap.get(queryWord);
+        if (center == null) {
+            return Collections.emptySet();
+        }
+
+        int resultSize = wordMap.size() < topNSize ? wordMap.size() : topNSize;
+        TreeSet<WordEntry> result = getWordEntries(center, resultSize);
+
+        return result;
+    }
+
+
+    public Set<WordEntry> distance(List<String> words) {
+        float[] center = null;
+        for (String word : words) {
+            center = sum(center, wordMap.get(word));
+        }
+
+        if (center == null) {
+            return Collections.emptySet();
+        }
+
+        int resultSize = wordMap.size() < topNSize ? wordMap.size() : topNSize;
+        TreeSet<WordEntry> result = getWordEntries(center, resultSize);
+
+        return result;
+    }
+
+    private TreeSet<WordEntry> getWordEntries(float[] center, int resultSize) {
+        TreeSet<WordEntry> result = new TreeSet<WordEntry>();
+
+        double min = Float.MIN_VALUE;
+        for (Map.Entry<String, float[]> entry : wordMap.entrySet()) {
+            float[] vector = entry.getValue();
+            float dist = 0;
+            for (int i = 0; i < vector.length; i++) {
+                dist += center[i] * vector[i];
+            }
+
+            if (dist > min) {
+                result.add(new WordEntry(entry.getKey(), dist));
+                if (resultSize < result.size()) {
+                    result.pollLast();
+                }
+                min = result.last().score;
+            }
+        }
+        result.pollFirst();
+        return result;
+    }
+
+    private float[] sum(float[] center, float[] fs) {
+        if (center == null && fs == null) {
+            return null;
+        }
+        if (fs == null) {
+            return center;
+        }
+        if (center == null) {
+            return fs;
+        }
+        for (int i = 0; i < fs.length; i++) {
+            center[i] += fs[i];
+        }
+        return center;
+    }
+
+    /**
+     * 得到词向量
+     *
+     * @param word
+     * @return
+     */
+    public float[] getWordVector(String word) {
+        return wordMap.get(word);
+    }
+
+    public static float readFloat(InputStream is) throws IOException {
+        byte[] bytes = new byte[4];
+        is.read(bytes);
+        return getFloat(bytes);
+    }
+
+    /**
+     * 读取一个float
+     *
+     * @param b
+     * @return
+     */
+    public static float getFloat(byte[] b) {
+        int accum = 0;
+        accum = accum | (b[0] & 0xff) << 0;
+        accum = accum | (b[1] & 0xff) << 8;
+        accum = accum | (b[2] & 0xff) << 16;
+        accum = accum | (b[3] & 0xff) << 24;
+        return Float.intBitsToFloat(accum);
+    }
+
+    /**
+     * 读取一个字符串
+     *
+     * @param dis
+     * @return
+     * @throws IOException
+     */
+    private static String readString(DataInputStream dis) throws IOException {
+        byte[] bytes = new byte[MAX_SIZE];
+        byte b = dis.readByte();
+        int i = -1;
+        StringBuilder sb = new StringBuilder();
+        while (b != 32 && b != 10) {
+            i++;
+            bytes[i] = b;
+            b = dis.readByte();
+            if (i == 49) {
+                sb.append(new String(bytes));
+                i = -1;
+                bytes = new byte[MAX_SIZE];
+            }
+        }
+        sb.append(new String(bytes, 0, i + 1));
+        return sb.toString();
+    }
+
+    public int getTopNSize() {
+        return topNSize;
+    }
+
+    public void setTopNSize(int topNSize) {
+        this.topNSize = topNSize;
+    }
+
+    public HashMap<String, float[]> getWordMap() {
+        return wordMap;
+    }
+
+    public int getWords() {
+        return words;
+    }
+
+    public int getSize() {
+        return size;
+    }
+}

+ 49 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/word2vec/vec/VecMap.java

@@ -0,0 +1,49 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.word2vec.vec;
+
+import java.util.HashMap;
+
+/**
+ * 自定义的hashmap
+ *
+ * @author itbluebox
+ */
+public class VecMap<T> {
+    private HashMap<T, Integer> hm = null;
+
+    public VecMap() {
+        this.hm = new HashMap<T, Integer>();
+    }
+
+    public VecMap(int size) {
+        this.hm = new HashMap<T, Integer>(size);
+    }
+
+    public void add(T t, int n) {
+        Integer integer;
+        if ((integer = this.hm.get(t)) != null) {
+            this.hm.put(t, Integer.valueOf(integer.intValue() + n));
+        } else {
+            this.hm.put(t, Integer.valueOf(n));
+        }
+    }
+
+    public void add(T t) {
+        this.add(t, 1);
+    }
+
+    public int size() {
+        return hm.size();
+    }
+
+    public void remove(T t) {
+        this.hm.remove(t);
+    }
+
+    public HashMap<T, Integer> getHm() {
+        return hm;
+    }
+
+    public void setHm(HashMap<T, Integer> hm) {
+        this.hm = hm;
+    }
+}

+ 104 - 0
ruoyi-admin/src/main/java/com/ruoyi/utils/resumeAnalysis/similarity/word2vec/vec/WordKmeans.java

@@ -0,0 +1,104 @@
+package com.ruoyi.utils.resumeAnalysis.similarity.word2vec.vec;
+
+import java.util.*;
+
+/**
+ * Kmeans聚类
+ *
+ * @author itbluebox
+ */
+public class WordKmeans {
+    public static class Classes {
+        private int id;
+        private float[] center;
+
+        public Classes(int id, float[] center) {
+            this.id = id;
+            this.center = center.clone();
+        }
+
+        Map<String, Double> values = new HashMap<>();
+
+        public double distance(float[] value) {
+            double sum = 0;
+            for (int i = 0; i < value.length; i++) {
+                sum += (center[i] - value[i]) * (center[i] - value[i]);
+            }
+            return sum;
+        }
+
+        public void putValue(String word, double score) {
+            values.put(word, score);
+        }
+
+        public void updateCenter(HashMap<String, float[]> wordMap) {
+            for (int i = 9; i < center.length; i++) {
+                center[i] = 0;
+            }
+            float[] value;
+            for (String keyword : values.keySet()) {
+                value = wordMap.get(keyword);
+                for (int i = 0; i < value.length; i++) {
+                    center[i] += value[i];
+                }
+            }
+            for (int i = 0; i < center.length; i++) {
+                center[i] = center[i] / values.size();
+            }
+        }
+
+        public void clear() {
+            values.clear();
+        }
+
+        public List<Map.Entry<String, Double>> getTop(int n) {
+            List<Map.Entry<String, Double>> arrayList = new ArrayList<>(values.entrySet());
+            Collections.sort(arrayList, (o1, o2) -> o1.getValue() > o2.getValue() ? 1 : -1);
+            int min = Math.min(n, arrayList.size() - 1);
+            if (min <= 1) return Collections.emptyList();
+            return arrayList.subList(0, min);
+        }
+    }
+
+    private HashMap<String, float[]> wordMap;
+    private int iter;
+    private Classes[] cArray;
+
+    public WordKmeans(HashMap<String, float[]> wordMap, int c, int iter) {
+        this.wordMap = wordMap;
+        this.iter = iter;
+        this.cArray = new Classes[c];
+    }
+
+    public Classes[] explain() {
+        Iterator<Map.Entry<String, float[]>> iterator = wordMap.entrySet().iterator();
+        for (int i = 0; i < cArray.length; i++) {
+            Map.Entry<String, float[]> next = iterator.next();
+            cArray[i] = new Classes(i, next.getValue());
+        }
+        for (int i = 0; i < iter; i++) {
+            for (Classes classes : cArray) {
+                classes.clear();
+            }
+            iterator = wordMap.entrySet().iterator();
+            while (iterator.hasNext()) {
+                Map.Entry<String, float[]> next = iterator.next();
+                double miniScore = Double.MAX_VALUE;
+                double temp;
+                int classesId = 0;
+                for (Classes classes : cArray) {
+                    temp = classes.distance(next.getValue());
+                    if (miniScore > temp) {
+                        miniScore = temp;
+                        classesId = classes.id;
+                    }
+                }
+                cArray[classesId].putValue(next.getKey(), miniScore);
+            }
+            for (Classes classes : cArray) {
+                classes.updateCenter(wordMap);
+            }
+        }
+        return cArray;
+    }
+}

BIN
ruoyi-admin/src/main/lib/hanlp-1.8.3.jar


+ 41 - 0
ruoyi-admin/src/main/resources/hanlp.properties

@@ -0,0 +1,41 @@
+#本配置文件中的路径的根目录,根目录+其他路径=完整路径(支持相对路径,请参考:https://github.com/hankcs/HanLP/pull/254)
+#Windows用户请注意,路径分隔符统一使用/
+#root=C:/Users/zjc/Desktop/工作文件/
+root=D:/JavaProjects/HanLP/
+#root=/home/quizgo/HanLP/
+
+#好了,以上为唯一需要修改的部分,以下配置项按需反注释编辑。
+
+#核心词典路径
+CoreDictionaryPath=data/dictionary/CoreNatureDictionary.txt
+#2元语法词典路径
+BiGramDictionaryPath=data/dictionary/CoreNatureDictionary.ngram.txt
+#自定义词典路径,用;隔开多个自定义词典,空格开头表示在同一个目录,使用“文件名 词性”形式则表示这个词典的词性默认是该词性。优先级递减。
+#所有词典统一使用UTF-8编码,每一行代表一个单词,格式遵从[单词] [词性A] [A的频次] [词性B] [B的频次] ... 如果不填词性则表示采用词典的默认词性。
+CustomDictionaryPath=data/dictionary/custom/CustomDictionary.txt; THUOCL_it.txt; 大学词库.txt; 现代汉语补充词库.txt; 全国地名大全.txt ns; 人名词典.txt; 机构名词典.txt; 自定义词库.txt; 上海地名.txt ns;data/dictionary/person/nrf.txt nrf;
+#停用词词典路径
+#CoreStopWordDictionaryPath=data/dictionary/stopwords.txt
+#同义词词典路径
+CoreSynonymDictionaryDictionaryPath=data/dictionary/synonym/CoreSynonym.txt
+#人名词典路径
+PersonDictionaryPath=data/dictionary/person/nr.txt
+#人名词典转移矩阵路径
+PersonDictionaryTrPath=data/dictionary/person/nr.tr.txt
+#繁简词典根目录
+#tcDictionaryRoot=data/dictionary/tc
+#HMM分词模型
+HMMSegmentModelPath=data/model/segment/HMMSegmentModel.bin
+#分词结果是否展示词性
+#ShowTermNature=true
+#IO适配器,实现com.hankcs.hanlp.corpus.io.IIOAdapter接口以在不同的平台(Hadoop、Redis等)上运行HanLP
+#默认的IO适配器如下,该适配器是基于普通文件系统的。
+#IOAdapter=com.hankcs.hanlp.corpus.io.FileIOAdapter
+#感知机词法分析器
+#PerceptronCWSModelPath=data/model/perceptron/pku1998/cws.bin
+#PerceptronPOSModelPath=data/model/perceptron/pku1998/pos.bin
+#PerceptronNERModelPath=data/model/perceptron/pku1998/ner.bin
+#CRF词法分析器
+#CRFCWSModelPath=data/model/crf/pku199801/cws.txt
+#CRFPOSModelPath=data/model/crf/pku199801/pos.txt
+#CRFNERModelPath=data/model/crf/pku199801/ner.txt
+#更多配置项请参考 https://github.com/hankcs/HanLP/blob/master/src/main/java/com/hankcs/hanlp/HanLP.java#L59 自行添加

+ 13 - 0
ruoyi-common/src/main/java/com/ruoyi/common/utils/DateUtils.java

@@ -78,6 +78,19 @@ public class DateUtils extends org.apache.commons.lang3.time.DateUtils
         return new SimpleDateFormat(format).format(date);
     }
 
+    /**
+     * 得到日期字符串 默认格式(yyyy-MM-dd) pattern可以为:"yyyy-MM-dd" "HH:mm:ss" "E"
+     */
+    public static String formatDate(Date date, Object... pattern) {
+        String formatDate = null;
+        if (pattern != null && pattern.length > 0) {
+            formatDate = DateFormatUtils.format(date, pattern[0].toString());
+        } else {
+            formatDate = DateFormatUtils.format(date, "yyyy-MM-dd");
+        }
+        return formatDate;
+    }
+
     public static final Date dateTime(final String format, final String ts)
     {
         try

+ 151 - 0
ruoyi-system/src/main/java/com/ruoyi/system/domain/resume/Resume.java

@@ -0,0 +1,151 @@
+package com.ruoyi.system.domain.resume;
+
+
+import lombok.Data;
+
+import java.util.Date;
+import java.util.List;
+
+@Data
+public class Resume {
+
+	private Integer resumeId;//简历ID
+
+    private String resumeName;//简历名称
+
+    private String userName;//姓名
+
+    private String gender;//性别
+
+    private Date birthday;//生日
+
+    private String age;//年龄
+
+    private String degree;//学历
+
+    private String experience;//工作年限
+
+    private String mobile;//手机号码
+
+    private String cardNo;//身份证账号
+
+    private String email;//电子邮箱
+
+    private Boolean isMarryed;//是否已婚
+
+    private String residence;//居住地
+
+    private String origin;//籍贯
+
+    private String nationality;//民族
+
+    private String graduateCollege;//毕业院校
+
+    private String major;//专业
+
+    private String lanuage;//语言水平
+
+    private Date graduateDate;//毕业时间
+
+    private String positionRating;//评级(低级,中级,高级)
+
+    private String keyWords;//技能关键字
+
+    private String jobObjective;//求职意向
+
+    private String salary;//薪水
+
+    private String selfIntroduce;//自我介绍
+
+    private String userId;//用户ID
+
+    private String headUrl;//用户图片地址
+
+    private Boolean isPrivate;//公开还是私密
+
+    private Boolean isDeleted;//是否删除
+
+    private Date createDate;//创建时间
+
+    private String updateUserId;//修改人
+
+    private Date updateDate;//更新时间
+    
+    private String resumeDownloadUrl;//简历下载地址
+    
+    private String status;//状态
+    
+    private String birthDateString;//出生日期
+    
+    private String currentCompany;//当前公司
+    
+    private String industry;//所属行业
+    
+    private List<ResumeWork> workList;//工作履历
+    
+    private List<ResumeProject> projectList;//项目经验
+    
+    private List<ResumeEducation> eduList;//教育经历
+    
+    private String createUserName;//创建人
+    
+    private String projectName;//项目名称
+    
+    private Date bindDate;//绑定时间
+    
+    private String bindId;//绑定id
+    
+    private String ivnum;//参加面试的个数
+    
+    private String projectId;//项目id
+
+    private String startTime; //开始时间
+
+    private String endTime; //结束时间
+
+    /**
+     * 文件路径
+     */
+    private String fileUrl;
+
+    private String workingPlace;// 工作地
+
+    private String certificate;// 证书
+
+    private String languageAbility;// 语言能力
+
+    private String industryExperience;// 行业经验
+
+    private String workExperience;// 工作经验
+
+    private String post;// 岗位
+
+    private String lastCompany;// 最近工作公司
+
+    private String lastIndustry;// 最近工作行业
+
+    private String lastPosition;// 最近工作职位
+
+    private Integer demandId;// 需求id
+
+    private Boolean isNeedInterviewer;// 是否需要面试官
+
+    private Boolean isInternalDemand;// 是否内部需求
+
+    private Boolean isGroupInterview;// 是否集中面试
+
+    private String selfEvaluation;// 自我评价
+
+    private String position;// 岗位
+
+    private String interviewer;// 面试官
+
+    private String interviewTime;// 面试时间
+
+    private String exceptedSalary;// 期望薪资
+
+    private Boolean isEnabled;// 是否可用
+
+    private String primaryScreeningUserId;// 简历初筛人员
+
+}

+ 27 - 0
ruoyi-system/src/main/java/com/ruoyi/system/domain/resume/ResumeEducation.java

@@ -0,0 +1,27 @@
+package com.ruoyi.system.domain.resume;
+
+
+import lombok.Data;
+
+@Data
+public class ResumeEducation {
+
+	private Integer eduId;//教育记录ID
+
+    private String schoolName;//学校名称
+
+    private String major;//专业
+
+    private String eduStartDate;//入学时间
+
+    private String eduEndDate;//毕业时间
+
+    private String degree;//学历
+
+    private Integer resumeId;//简历ID(外键)
+
+    private String detail;//描述
+    
+    private Integer orderBy;//排序
+
+}

+ 29 - 0
ruoyi-system/src/main/java/com/ruoyi/system/domain/resume/ResumeProject.java

@@ -0,0 +1,29 @@
+package com.ruoyi.system.domain.resume;
+
+
+import lombok.Data;
+
+@Data
+public class ResumeProject {
+
+	private Integer projectId;//项目记录ID
+
+    private String projectName;//项目名称
+
+    private String projectDetail;//项目描述
+
+    private String partIn;//责任描述
+
+    private String projectStartDate;//项目开始时间
+
+    private String projectEndDate;//项目结束时间
+
+    private Integer resumeId;//简历ID(外键)
+
+    private String developEnvironment;//软件环境
+
+    private String developTools;//开发工具
+    
+    private Integer orderBy;//排序字段
+    
+}

+ 33 - 0
ruoyi-system/src/main/java/com/ruoyi/system/domain/resume/ResumeWork.java

@@ -0,0 +1,33 @@
+package com.ruoyi.system.domain.resume;
+
+
+import lombok.Data;
+
+@Data
+public class ResumeWork {
+ 
+	private Integer workId;//工作记录ID
+
+    private String iinductionStartDate;//入职时间
+
+    private String iinductionEndDate;//离职时间
+
+    private Integer resumeId;//简历ID(外键)
+    
+    private String companyName;//公司名称
+
+    private String companyPosition;//公司地址
+
+    private String leaderName;//证明人
+
+    private String leaderMobile;//证明人联系方式
+
+    private String postionName;//岗位
+
+    private String workDetail;//工作描述
+
+    private String reason;//离职原因
+    
+    private Integer orderBy;//排序
+
+}