|
@@ -0,0 +1,859 @@
|
|
|
+package com.ruoyi.utils.resumeAnalysis;
|
|
|
+
|
|
|
+import com.ruoyi.common.utils.DateUtils;
|
|
|
+import com.ruoyi.system.domain.resume.Resume;
|
|
|
+import com.ruoyi.system.domain.resume.ResumeEducation;
|
|
|
+import com.ruoyi.system.domain.resume.ResumeProject;
|
|
|
+import com.ruoyi.system.domain.resume.ResumeWork;
|
|
|
+import com.ruoyi.utils.resumeAnalysis.similarity.Similarity;
|
|
|
+import com.ruoyi.utils.resumeAnalysis.similarity.similarity.util.StringUtil;
|
|
|
+import com.hankcs.hanlp.corpus.tag.Nature;
|
|
|
+import com.hankcs.hanlp.seg.common.Term;
|
|
|
+import com.hankcs.hanlp.tokenizer.NLPTokenizer;
|
|
|
+import org.apache.commons.lang3.StringUtils;
|
|
|
+
|
|
|
+import java.util.*;
|
|
|
+import java.util.regex.Matcher;
|
|
|
+import java.util.regex.Pattern;
|
|
|
+
|
|
|
+public class ResumeAnalysisUtil {
|
|
|
+
|
|
|
+ public static void main(String[] main) throws Exception {
|
|
|
+ long startTime = System.currentTimeMillis();
|
|
|
+// String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1669778120747.doc";
|
|
|
+// String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1667438649530s.doc";
|
|
|
+// String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1669798761043.doc";
|
|
|
+// String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1667371161809.doc";
|
|
|
+// String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1667371161809.doc";
|
|
|
+// String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671083578748.doc";
|
|
|
+// String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671088704774.doc";
|
|
|
+// String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671088776900.doc";
|
|
|
+// String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671089013938.doc";
|
|
|
+// String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671095791131.doc";
|
|
|
+// String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671095908789.doc";
|
|
|
+// String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671096212499.doc";
|
|
|
+// String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671152868926.doc";
|
|
|
+// String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671156366121.doc";
|
|
|
+ //String Path = "C:\\Users\\Administrator\\Desktop\\王胜利-Java-佛山.doc";
|
|
|
+ String Path = "D:\\我的文件\\me\\张洛飞入职资料\\张洛飞\\张洛飞简历.doc";
|
|
|
+// String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671433184218.doc";
|
|
|
+// String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671436400350.doc";
|
|
|
+// String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671436611530.doc";
|
|
|
+// String Path = "C:\\Users\\Administrator\\Desktop\\张继超\\1671436736483.doc";
|
|
|
+
|
|
|
+// String Path = "C:\\Users\\zjc\\Desktop\\1671095908789.doc";
|
|
|
+
|
|
|
+ //读取word内容
|
|
|
+ List<String> result = FileUtils.readWord(Path);
|
|
|
+ //解析
|
|
|
+ Resume resume = toResumeAnalysis(result);
|
|
|
+ System.out.println(resume);
|
|
|
+ long endTime = System.currentTimeMillis();
|
|
|
+ System.out.println("程序运行时间:" + (double) (endTime - startTime) / 1000 + "s");
|
|
|
+ }
|
|
|
+
|
|
|
+ public static Resume toResumeAnalysis(List<String> result){
|
|
|
+
|
|
|
+ //获取标识和基础信息相似度
|
|
|
+ Map<String, Map<String,Object>> resultMap = subsection(result);
|
|
|
+ //大标识
|
|
|
+ List<Integer> list = identification(resultMap);
|
|
|
+ //大标识内容
|
|
|
+ List<Map<String,String>> paragraphList = paragraph(list,resultMap);
|
|
|
+ //其余标识内容
|
|
|
+ Map<String,String> surplusMap = surplus(result,list,paragraphList);
|
|
|
+
|
|
|
+// System.out.println(resultMap);
|
|
|
+// System.out.println(list);
|
|
|
+// System.out.println(paragraphList);
|
|
|
+// System.out.println(surplusMap);
|
|
|
+
|
|
|
+ //简历基础信息对象
|
|
|
+ Resume resume = new Resume();
|
|
|
+
|
|
|
+ //判断分段中是否有个人信息大标识
|
|
|
+ boolean PERSONAL_FLAG = true;
|
|
|
+
|
|
|
+ //开始进行基础信息处理
|
|
|
+ for (Map<String,String> str:paragraphList) {
|
|
|
+ if(StringUtils.isNotEmpty(str.get("title"))){
|
|
|
+ String title = HanLPUtils.StringFilter(str.get("title"));
|
|
|
+ if(StringUtils.isNotEmpty(title)){
|
|
|
+ //map排序
|
|
|
+ Map<String,String> mapByKey = HanLPUtils.sortMapByKey(str);
|
|
|
+
|
|
|
+ //个人信息、基础信息
|
|
|
+ if(containsWordsIndexOf(title,ResumeType.PERSONAL)){
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 处理数据
|
|
|
+ * 姓名、手机号、学历、行业经验、毕业院校、E-mail、工作经验
|
|
|
+ * 工作地、居住地、专业
|
|
|
+ */
|
|
|
+ for (Map.Entry<String, String> entry : mapByKey.entrySet()) {
|
|
|
+ if(!entry.getKey().equals("title")){
|
|
|
+ String content = entry.getValue().replaceAll("\\s+", "");
|
|
|
+ //获取内容中的小标识
|
|
|
+ List<Map<Integer,String>> mapList = PersonalUtils.getPersonalSubscript(content);
|
|
|
+ //进行数据摘取
|
|
|
+ Map<Integer,String> resumeMap = toArrangementSort(mapList);
|
|
|
+ //拆分数据后进行分类
|
|
|
+ Map<String,String> dataList = toSplitContent(resumeMap,content);
|
|
|
+ for (Map.Entry<String,String> dataMap: dataList.entrySet()) {
|
|
|
+ PersonalType personalType = PersonalUtils.toJudgeType(dataMap.getKey());
|
|
|
+ if(personalType.getCode().equals("XM"))resume.setUserName(dataMap.getValue());//姓名
|
|
|
+ if(personalType.getCode().equals("YX"))resume.setEmail(dataMap.getValue());//邮箱
|
|
|
+ if(personalType.getCode().equals("SJH"))resume.setMobile(dataMap.getValue());//电话
|
|
|
+ if(personalType.getCode().equals("MZ"))resume.setNationality(dataMap.getValue());//民族
|
|
|
+ if(personalType.getCode().equals("XB"))resume.setGender(dataMap.getValue());//性别
|
|
|
+ if(personalType.getCode().equals("XL"))resume.setDegree(dataMap.getValue());//学历
|
|
|
+ if(personalType.getCode().equals("XX"))resume.setGraduateCollege(dataMap.getValue());//学校
|
|
|
+ if(personalType.getCode().equals("ZY"))resume.setMajor(dataMap.getValue());//专业
|
|
|
+ if(personalType.getCode().equals("GZJY"))resume.setWorkExperience(dataMap.getValue());//工作经验
|
|
|
+ if(personalType.getCode().equals("JZD"))resume.setResidence(dataMap.getValue());//居住地
|
|
|
+ if(personalType.getCode().equals("GZD"))resume.setWorkingPlace(dataMap.getValue());//工作地
|
|
|
+// if(personalType.getCode().equals("SGTZ"))resume.setUserName(dataMap.getValue());//身高体重
|
|
|
+ if(personalType.getCode().equals("SR"))resume.setBirthDateString(dataMap.getValue());//生日
|
|
|
+ if(personalType.getCode().equals("YYNL"))resume.setLanguageAbility(dataMap.getValue());//语言能力
|
|
|
+ if(personalType.getCode().equals("GW"))resume.setPosition(dataMap.getValue());//岗位
|
|
|
+// if(personalType.getCode().equals("BYSJ"))resume.setUserName(dataMap.getValue());//毕业时间
|
|
|
+
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ /*
|
|
|
+ 如果又个人信息和基础信息那么就进行信息提取如果没有则从其他信息里提取信息
|
|
|
+ */
|
|
|
+ PERSONAL_FLAG = false;
|
|
|
+ }
|
|
|
+ //工作经历
|
|
|
+ if(containsWordsIndexOf(title,ResumeType.WORKEXPERIENCE)){
|
|
|
+ /**
|
|
|
+ * 开始处理工作经历信息
|
|
|
+ */
|
|
|
+ List<String> dateList = new ArrayList<>();
|
|
|
+ List<String> companyNameList = new ArrayList<>();
|
|
|
+ List<String> postionNameList = new ArrayList<>();
|
|
|
+ List<String> workDetailList = new ArrayList<>();
|
|
|
+ List<ResumeWork> workList = new ArrayList<>();
|
|
|
+ //当前标识节点
|
|
|
+ String flag = "";
|
|
|
+ for (Map.Entry<String, String> entry : mapByKey.entrySet()) {
|
|
|
+ if (!entry.getKey().equals("title")) {
|
|
|
+ String content = entry.getValue().replaceAll("\\s+", "");
|
|
|
+ //提取时间
|
|
|
+
|
|
|
+ String SR = "^\\d{4}(.|-|\\/|年)\\d{1,2}(~|—|--|–|~|-|\\/)(\\d{4}(.|-|\\/|年)\\d{1,2}|至今)";
|
|
|
+ Pattern pattern = Pattern.compile(SR); //尝试提取这样类型的数据
|
|
|
+ Matcher matcher = pattern.matcher(content);
|
|
|
+ while (matcher.find()) {
|
|
|
+ dateList.add(matcher.group());
|
|
|
+ String dateString = matcher.group();
|
|
|
+ content = content.replace(dateString,"");
|
|
|
+ flag = dateString;
|
|
|
+ }
|
|
|
+
|
|
|
+ if(content.contains("有限公司")){
|
|
|
+ String[] resumeWork = content.split("有限公司");
|
|
|
+ if(resumeWork != null && resumeWork.length > 0){
|
|
|
+ companyNameList.add(resumeWork[0]+"有限公司");
|
|
|
+ }
|
|
|
+ if(resumeWork.length >= 2){
|
|
|
+ postionNameList.add(resumeWork[1]);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if(dateList!=null && dateList.size()>0){
|
|
|
+ if(dateList.get(dateList.size()-1).equals(flag)){
|
|
|
+ if(workDetailList.size() == dateList.size()){
|
|
|
+ workDetailList.set(dateList.size()-1,workDetailList.get(dateList.size()-1)+content);
|
|
|
+ }else{
|
|
|
+ workDetailList.add(content);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if(null != dateList && dateList.size() > 0){
|
|
|
+ for (int i = 0; i < dateList.size(); i++) {
|
|
|
+ ResumeWork resumeWork = new ResumeWork();
|
|
|
+ String[] datestr = dateList.get(i).split("(~|--|–|~|-)");
|
|
|
+ if(datestr.length>0){
|
|
|
+ Date startDate = DateUtils.parseDate(datestr[0]);
|
|
|
+ Date endDate = DateUtils.parseDate(datestr[1]);
|
|
|
+ if(datestr[0].equals("至今")){
|
|
|
+ startDate = new Date();
|
|
|
+ }
|
|
|
+ if(datestr[1].equals("至今")){
|
|
|
+ endDate = new Date();
|
|
|
+ }
|
|
|
+ resumeWork.setIinductionStartDate(DateUtils.formatDate(startDate,"yyyy-MM"));
|
|
|
+ resumeWork.setIinductionEndDate(DateUtils.formatDate(endDate,"yyyy-MM"));
|
|
|
+ }
|
|
|
+ if(companyNameList!=null&&companyNameList.size() == dateList.size()){
|
|
|
+ resumeWork.setCompanyName(companyNameList.get(i));
|
|
|
+ }
|
|
|
+ if(postionNameList!=null&&postionNameList.size() == dateList.size()){
|
|
|
+ resumeWork.setPostionName(postionNameList.get(i));
|
|
|
+ }
|
|
|
+ if(workDetailList!=null&&workDetailList.size() == dateList.size()){
|
|
|
+ resumeWork.setWorkDetail(workDetailList.get(i));
|
|
|
+ }
|
|
|
+ workList.add(resumeWork);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ resume.setWorkList(workList);
|
|
|
+ }
|
|
|
+ //项目经验
|
|
|
+ if(containsWordsIndexOf(title,ResumeType.PROJECTEXPERIENCE)){
|
|
|
+ List<Map<String,Object>> regularList = new ArrayList<>();
|
|
|
+ for (Map.Entry<String, String> entry : mapByKey.entrySet()) {
|
|
|
+ if (!entry.getKey().equals("title")) {
|
|
|
+ String content = entry.getValue().replaceAll("\\s+", "");
|
|
|
+ //获取内容小标识
|
|
|
+ List<Map<Integer,String>> mapList = ProjectUtils.getProjectSubscript(content);
|
|
|
+ //进行数据摘取
|
|
|
+ Map<Integer,String> resumeMap = toArrangementSort(mapList);
|
|
|
+ //拆分数据后进行分类
|
|
|
+ Map<String,String> dataList = toSplitContent(resumeMap,content);
|
|
|
+ //在根据整体下标在进行进一步拆分处理
|
|
|
+ Map<String,Object> newMap = new HashMap<>();
|
|
|
+ newMap.put("dataList",dataList);
|
|
|
+ newMap.put("content",content);
|
|
|
+ newMap.put("key",entry.getKey());
|
|
|
+ regularList.add(newMap);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //开始处理数据并进行分段处理
|
|
|
+ List<ResumeProject> resumeProjectList = toProjectSubsection(regularList);
|
|
|
+ if(resumeProjectList != null && resumeProjectList.size() > 0){
|
|
|
+ resume.setProjectList(resumeProjectList);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //教育经历
|
|
|
+ if(containsWordsIndexOf(title,ResumeType.EDUCATIONALEXPERIENCE)){
|
|
|
+ List<ResumeEducation> eduList = new ArrayList<>();
|
|
|
+ //根据分词处理数据
|
|
|
+ for (Map.Entry<String, String> entry : mapByKey.entrySet()) {
|
|
|
+ if (!entry.getKey().equals("title")) {
|
|
|
+ String content = entry.getValue().replaceAll("\\s+", "");
|
|
|
+
|
|
|
+ List<String> dateList = new ArrayList<>();
|
|
|
+ String school = "";
|
|
|
+ String major = "";
|
|
|
+ //提取时间
|
|
|
+ String SR = "^\\d{4}(.|-|\\/|年)\\d{1,2}(~|—|--|–|~|-|\\/)(\\d{4}(.|-|\\/|年)\\d{1,2}|至今)";
|
|
|
+ Pattern pattern = Pattern.compile(SR); //尝试提取这样类型的数据
|
|
|
+ Matcher matcher = pattern.matcher(content);
|
|
|
+ while (matcher.find()) {
|
|
|
+ dateList.add(matcher.group());
|
|
|
+ String dateString = matcher.group();
|
|
|
+ content = content.replace(dateString,"");
|
|
|
+ }
|
|
|
+
|
|
|
+ List<Term> termList = NLPTokenizer.segment(content);
|
|
|
+// System.out.println(termList);
|
|
|
+ ResumeEducation resumeEducation = new ResumeEducation();
|
|
|
+ if(termList!= null && termList.size()>0){
|
|
|
+ for (int i = 0; i < termList.size(); i++) {
|
|
|
+ Term term = termList.get(i);
|
|
|
+ if (term.nature.equals(Nature.nt)
|
|
|
+ ||term.nature.toString().equals("学校")
|
|
|
+ ||term.nature.equals(Nature.ntu)
|
|
|
+ ||term.nature.equals(Nature.nts)) {
|
|
|
+ school += term.word;
|
|
|
+ } else if (term.nature.toString().equals("学历")) {
|
|
|
+ resumeEducation.setDegree(term.word);
|
|
|
+ } else {
|
|
|
+ major += term.word;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ resumeEducation.setSchoolName(school);
|
|
|
+ resumeEducation.setMajor(major);
|
|
|
+ for (int i = 0; i < dateList.size(); i++) {
|
|
|
+ String[] dateStr = dateList.get(i).split("(~|--|–|~|-)");
|
|
|
+ if(dateStr!=null&&dateStr.length>0){
|
|
|
+ Date startDate = DateUtils.parseDate(dateStr[0]);
|
|
|
+ resumeEducation.setEduStartDate(DateUtils.formatDate(startDate,"yyyy-MM"));
|
|
|
+ Date endDate = DateUtils.parseDate(dateStr[1]);
|
|
|
+ resumeEducation.setEduEndDate(DateUtils.formatDate(endDate,"yyyy-MM"));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ eduList.add(resumeEducation);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if(eduList != null && eduList.size() > 0){
|
|
|
+ resume.setEduList(eduList);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //自我评价
|
|
|
+ if(containsWordsIndexOf(title,ResumeType.SELFEVALUATION)){
|
|
|
+ String selfIntroduce = "";
|
|
|
+ for (Map.Entry<String, String> entry : mapByKey.entrySet()) {
|
|
|
+ if(!entry.getKey().equals("title")){
|
|
|
+ selfIntroduce += entry.getValue();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ resume.setSelfIntroduce(selfIntroduce);
|
|
|
+
|
|
|
+ }
|
|
|
+ //专业技能
|
|
|
+ if(containsWordsIndexOf(title,ResumeType.PROFESSIONALSKILLS)){
|
|
|
+ String keyWords = "";
|
|
|
+ for (Map.Entry<String, String> entry : mapByKey.entrySet()) {
|
|
|
+ if(!entry.getKey().equals("title")){
|
|
|
+ keyWords += entry.getValue();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ resume.setKeyWords(keyWords);
|
|
|
+ }
|
|
|
+ //证书
|
|
|
+ if(containsWordsIndexOf(title,ResumeType.CERTIFICATE)){
|
|
|
+ String certificate = "";
|
|
|
+ for (Map.Entry<String, String> entry : mapByKey.entrySet()) {
|
|
|
+ if(!entry.getKey().equals("title")){
|
|
|
+ certificate += entry.getValue();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ resume.setCertificate(certificate);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //如果没有则在其他信息里提取
|
|
|
+ if(PERSONAL_FLAG){
|
|
|
+ /**
|
|
|
+ * 从其他信息中提取基本信息
|
|
|
+ */
|
|
|
+ /**
|
|
|
+ * 处理数据
|
|
|
+ * 姓名、手机号、学历、行业经验、毕业院校、E-mail、工作经验
|
|
|
+ * 工作地、居住地、专业
|
|
|
+ */
|
|
|
+ for (Map.Entry<String, String> entry : surplusMap.entrySet()) {
|
|
|
+ if(!entry.getKey().equals("title")){
|
|
|
+ String content = entry.getValue().replaceAll("\\s+", "");
|
|
|
+ //获取内容中的小标识
|
|
|
+ List<Map<Integer,String>> mapList = PersonalUtils.getPersonalSubscript(content);
|
|
|
+ //进行数据摘取
|
|
|
+ Map<Integer,String> resumeMap = toArrangementSort(mapList);
|
|
|
+ //拆分数据后进行分类
|
|
|
+ Map<String,String> dataList = toSplitContent(resumeMap,content);
|
|
|
+ for (Map.Entry<String,String> dataMap: dataList.entrySet()) {
|
|
|
+ PersonalType personalType = PersonalUtils.toJudgeType(dataMap.getKey());
|
|
|
+ if(personalType.getCode().equals("XM"))resume.setUserName(dataMap.getValue());//姓名
|
|
|
+ if(personalType.getCode().equals("YX"))resume.setEmail(dataMap.getValue());//邮箱
|
|
|
+ if(personalType.getCode().equals("SJH"))resume.setMobile(dataMap.getValue());//电话
|
|
|
+ if(personalType.getCode().equals("MZ"))resume.setNationality(dataMap.getValue());//民族
|
|
|
+ if(personalType.getCode().equals("XB"))resume.setGender(dataMap.getValue());//性别
|
|
|
+ if(personalType.getCode().equals("XL"))resume.setDegree(dataMap.getValue());//学历
|
|
|
+ if(personalType.getCode().equals("XX"))resume.setGraduateCollege(dataMap.getValue());//学校
|
|
|
+ if(personalType.getCode().equals("ZY"))resume.setMajor(dataMap.getValue());//专业
|
|
|
+ if(personalType.getCode().equals("GZJY"))resume.setWorkExperience(dataMap.getValue());//工作经验
|
|
|
+ if(personalType.getCode().equals("JZD"))resume.setResidence(dataMap.getValue());//居住地
|
|
|
+ if(personalType.getCode().equals("GZD"))resume.setWorkingPlace(dataMap.getValue());//工作地
|
|
|
+// if(personalType.getCode().equals("SGTZ"))resume.setUserName(dataMap.getValue());//身高体重
|
|
|
+ if(personalType.getCode().equals("SR"))resume.setBirthDateString(dataMap.getValue());//生日
|
|
|
+ if(personalType.getCode().equals("YYNL"))resume.setLanguageAbility(dataMap.getValue());//语言能力
|
|
|
+ if(personalType.getCode().equals("GW"))resume.setPosition(dataMap.getValue());//岗位
|
|
|
+// if(personalType.getCode().equals("BYSJ"))resume.setUserName(dataMap.getValue());//毕业时间
|
|
|
+
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //如果姓名为空:取第一行值
|
|
|
+ if(StringUtils.isEmpty(resume.getUserName())){
|
|
|
+ /* 循环需第一行值 */
|
|
|
+ resume.setUserName(surplusMap.get("0"));
|
|
|
+ }
|
|
|
+ //如果手机号为空,通过正则提取
|
|
|
+ if(StringUtils.isEmpty(resume.getMobile())){
|
|
|
+ /**
|
|
|
+ * 处理数据
|
|
|
+ * 手机号
|
|
|
+ */
|
|
|
+ for (Map.Entry<String, String> entry : surplusMap.entrySet()) {
|
|
|
+ String content = entry.getValue().replaceAll("\\s+", "");
|
|
|
+ String mobile = HanLPUtils.StringFilter(HanLPUtils.getHanLPTelephone(content));
|
|
|
+ if(StringUtils.isNotEmpty(mobile)){
|
|
|
+ resume.setMobile(mobile);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //如果性别提取为空,根据正则提取
|
|
|
+ if(StringUtils.isEmpty(resume.getGender())){
|
|
|
+ /** 处理数据 */
|
|
|
+ for (Map.Entry<String, String> entry : surplusMap.entrySet()) {
|
|
|
+ String content = entry.getValue().replaceAll("\\s+", "");
|
|
|
+ String gender = HanLPUtils.StringFilter(HanLPUtils.getHanLPSex(content));
|
|
|
+ if(StringUtils.isNotEmpty(gender)){
|
|
|
+ resume.setGender(gender);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //如果邮箱提取为空,则根据正则提取邮箱
|
|
|
+ if(StringUtils.isEmpty(resume.getEmail())){
|
|
|
+ /** 处理数据 */
|
|
|
+ for (Map.Entry<String, String> entry : surplusMap.entrySet()) {
|
|
|
+ String content = entry.getValue().replaceAll("\\s+", "");
|
|
|
+ String email = HanLPUtils.regularAcquisition("^[a-zA-Z0-9_-]+@[a-zA-Z0-9_-]+(\\.[a-zA-Z0-9_-]+)+$",content);
|
|
|
+ if(StringUtils.isNotEmpty(email)){
|
|
|
+ resume.setEmail(email);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //如果年龄提取为空,则根据正则提取年龄
|
|
|
+ if(StringUtils.isEmpty(resume.getAge())){
|
|
|
+ /** 处理数据 */
|
|
|
+ for (Map.Entry<String, String> entry : surplusMap.entrySet()) {
|
|
|
+ String content = entry.getValue().replaceAll("\\s+", "");
|
|
|
+ String age = HanLPUtils.regularAcquisition("\\d{1,2}(岁)",content);
|
|
|
+ if(StringUtils.isNotEmpty(age)){
|
|
|
+ resume.setAge(age);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //姓名去除特殊符号
|
|
|
+ if(StringUtils.isNotEmpty(resume.getUserName())){
|
|
|
+ resume.setUserName(HanLPUtils.StringFilter(resume.getUserName()));
|
|
|
+ }
|
|
|
+ //学历去除特殊符号
|
|
|
+ if(StringUtils.isNotEmpty(resume.getDegree())){
|
|
|
+ resume.setDegree(HanLPUtils.StringFilter(resume.getDegree()));
|
|
|
+ }
|
|
|
+ //手机号去除汉字
|
|
|
+ if(StringUtils.isNotEmpty(resume.getMobile())){
|
|
|
+ resume.setMobile(StringUtil.removeStrChinese(resume.getMobile()));
|
|
|
+ }
|
|
|
+ return resume;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 判断是否为大标题段落
|
|
|
+ * @param docContent
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ public static Map<String, Map<String,Object>> subsection(List<String> docContent){
|
|
|
+ Map<String, Map<String,Object>> resultMap = new HashMap<>();
|
|
|
+ for (int c = 0; c < docContent.size(); c++) {
|
|
|
+ Map<String, Object> result = new HashMap<String, Object>();
|
|
|
+ String txtContent = docContent.get(c);
|
|
|
+ if (StringUtils.isNotEmpty(txtContent)) {
|
|
|
+ //内容下标
|
|
|
+ result.put("coordinate", c);
|
|
|
+ /**
|
|
|
+ * 先进行主要标识对比
|
|
|
+ */
|
|
|
+ boolean isTitle = isTitle(txtContent);
|
|
|
+ result.put("isTitle", isTitle);
|
|
|
+ result.put("txtContent",txtContent);
|
|
|
+ resultMap.put(c+"",result);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return resultMap;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ *
|
|
|
+ * @param str
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ public static boolean isTitle(String str){
|
|
|
+ List<String> list = Arrays.asList("项目经验", "项目经历",
|
|
|
+ "工作经验","工作实习经历", "工作经历","工作履历",
|
|
|
+ "教育经历","培训经历","教育培训经历","教育背景",
|
|
|
+ "专业技能","技能","技能专长","技能特长", "掌握技能","职业技能","相关技能","个人技能",
|
|
|
+ "证书","技能证书","求职意向","推荐评语",
|
|
|
+ "个人评价", "自我评价","个人总结","个人特点","个人优势","自我认知","个人简介","个人简历",
|
|
|
+ "联系方式","个人信息","基本信息","个人资料");
|
|
|
+ for (String title:list) {
|
|
|
+ double titleSimilarity = Similarity.phraseSimilarity(title, HanLPUtils.StringFilter(str));
|
|
|
+ if(titleSimilarity >= 1 )return true;
|
|
|
+ }
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ *
|
|
|
+ * @param resultMap
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ public static List<Integer> identification(Map<String, Map<String,Object>> resultMap){
|
|
|
+ List<Integer> list = new ArrayList<>();
|
|
|
+ for (Map.Entry<String, Map<String,Object>> entry : resultMap.entrySet()) {
|
|
|
+ Map<String,Object> map = entry.getValue();
|
|
|
+ if(null != map.get("isTitle") && (boolean)map.get("isTitle")){
|
|
|
+ list.add(Integer.valueOf(entry.getKey()));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ Collections.sort(list);
|
|
|
+ return list;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ *
|
|
|
+ * @param list
|
|
|
+ * @param resultMap
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ public static List<Map<String,String>> paragraph(List<Integer> list,Map<String, Map<String,Object>> resultMap){
|
|
|
+ List<Map<String,String>> paragraphList = new ArrayList<>();
|
|
|
+ for (int i = 0; i < list.size(); i++) {
|
|
|
+ Integer coordinate = list.get(i);
|
|
|
+ Map<String,String> paragraph = new HashMap<>();
|
|
|
+ for (Map.Entry<String, Map<String,Object>> entry : resultMap.entrySet()) {
|
|
|
+ Map<String,Object> map = entry.getValue();
|
|
|
+ if(null != map.get("isTitle")&&(boolean)map.get("isTitle")){
|
|
|
+ if(Integer.valueOf(coordinate) == map.get("coordinate")){
|
|
|
+ paragraph.put("title",map.get("txtContent").toString());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if(null != map.get("isTitle")&&!(boolean)map.get("isTitle")){
|
|
|
+ if(i+1 < list.size() &&
|
|
|
+ coordinate<(Integer) map.get("coordinate")&&
|
|
|
+ Integer.valueOf(list.get(i+1))>(Integer) map.get("coordinate")){
|
|
|
+ //获取段落
|
|
|
+ paragraph.put(entry.getKey(),map.get("txtContent").toString());
|
|
|
+ } else if (i+1 == list.size()
|
|
|
+ &&coordinate<(Integer) map.get("coordinate")) {
|
|
|
+ paragraph.put(entry.getKey(),map.get("txtContent").toString());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ paragraphList.add(paragraph);
|
|
|
+ }
|
|
|
+ return paragraphList;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 筛选其他信息内容
|
|
|
+ * @param resultMap
|
|
|
+ * @param list
|
|
|
+ * @param paragraphList
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ public static Map<String,String> surplus(List<String> resultMap,List<Integer> list,List<Map<String,String>> paragraphList){
|
|
|
+ Map<String,String> basicsMap = new HashMap<>();
|
|
|
+ for (int i = 0; i < resultMap.size(); i++) {
|
|
|
+ boolean flag = true;
|
|
|
+ for (Integer j:list) {
|
|
|
+ if(j == i){
|
|
|
+ flag = false;
|
|
|
+ }else{
|
|
|
+ for (Map<String,String> paragraph: paragraphList) {
|
|
|
+ for (Map.Entry<String,String> pMap : paragraph.entrySet()) {
|
|
|
+ if(!pMap.getKey().equals("title")){
|
|
|
+ if(i == Integer.valueOf(pMap.getKey())){
|
|
|
+ flag = false;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if(flag){
|
|
|
+ basicsMap.put(i+"",resultMap.get(i));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return basicsMap;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 检查字符串包含多个关键词
|
|
|
+ * @param inputString
|
|
|
+ * @param words
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ public static boolean containsWordsIndexOf(String inputString, ResumeType words) {
|
|
|
+ boolean found = false;
|
|
|
+ for (String word : words.getCode()) {
|
|
|
+ if (inputString.indexOf(word) != -1) {
|
|
|
+ found = true;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return found;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ *
|
|
|
+ * @param mapList
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ public static Map<Integer,String> toArrangementSort(List<Map<Integer,String>> mapList){
|
|
|
+ Map<Integer,String> resumeMap = new HashMap<>();
|
|
|
+ if(mapList!=null && mapList.size()>0){
|
|
|
+ for (Map<Integer,String> smallMap: mapList) {
|
|
|
+ if(smallMap.size()>0 && smallMap.size()==1){
|
|
|
+ for (Map.Entry<Integer,String> sMap: smallMap.entrySet()) {
|
|
|
+ resumeMap.put(sMap.getKey(),sMap.getValue());
|
|
|
+ }
|
|
|
+ } else if (smallMap.size()>0 && smallMap.size()>1) {
|
|
|
+ TreeMap<Integer, String> paramTreeMap = new TreeMap<>(smallMap);
|
|
|
+ resumeMap.put(paramTreeMap.firstKey(),paramTreeMap.get(paramTreeMap.firstKey()));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return resumeMap;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ *
|
|
|
+ * @param resumeMap
|
|
|
+ * @param content
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ public static Map<String,String> toSplitContent(Map<Integer,String> resumeMap,String content){
|
|
|
+
|
|
|
+ List<Integer> key = new ArrayList<>();
|
|
|
+ List<String> values = new ArrayList<>();
|
|
|
+ Map<String,String> subscriptMap = new HashMap<>();
|
|
|
+ for (Map.Entry<Integer,String> map: resumeMap.entrySet()) {
|
|
|
+ key.add(map.getKey());
|
|
|
+ values.add(map.getValue());
|
|
|
+ }
|
|
|
+ if(key!=null && key.size()>0){
|
|
|
+ Collections.sort(key);
|
|
|
+ for (int i = 0; i < key.size(); i++) {
|
|
|
+ key.get(i);
|
|
|
+ if(i+1 < key.size()){
|
|
|
+ String str = content.substring(key.get(i),key.get(i+1));
|
|
|
+ str = str.replaceAll(values.get(i),"").replaceAll(":","");
|
|
|
+ subscriptMap.put(values.get(i),str);
|
|
|
+ } else if (i+1 == key.size()) {
|
|
|
+ String str = content.substring(key.get(i),content.length());
|
|
|
+ str = str.replaceAll(values.get(i),"").replaceAll(":","");
|
|
|
+ subscriptMap.put(values.get(i),str);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return subscriptMap;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /**
|
|
|
+ *
|
|
|
+ * @param List
|
|
|
+ * @param content
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ public static Map<Integer,String> getSubscript(List<String> List,String content){
|
|
|
+ Map<Integer,String> map = new HashMap<>();
|
|
|
+ for (String words:List) {
|
|
|
+ if(content.contains(words)){
|
|
|
+ int subscript = StrStr(content,words);
|
|
|
+ if(subscript!=-1){
|
|
|
+ map.put(subscript,words);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return map;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ *
|
|
|
+ * @param List
|
|
|
+ * @param content
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ public static boolean getVerificationType(List<String> List,String content){
|
|
|
+ Map<Integer,String> map = new HashMap<>();
|
|
|
+ for (String words:List) {
|
|
|
+ if(content.contains(words)){
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ *
|
|
|
+ * @param pattern
|
|
|
+ * @param n
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ private static int[] Build_PrefixTable(String pattern,int n)
|
|
|
+ {
|
|
|
+ if (n == 0) return new int[0];
|
|
|
+ int[] prefix = new int[n];
|
|
|
+ int i = 1, j = 0;
|
|
|
+ while(i < n)
|
|
|
+ {
|
|
|
+ while (j > 0 && pattern.charAt(i) != pattern.charAt(j))
|
|
|
+ {
|
|
|
+ j = prefix[j - 1];
|
|
|
+ }
|
|
|
+ if (pattern.charAt(i) == pattern.charAt(j))
|
|
|
+ {
|
|
|
+ j++;
|
|
|
+ }
|
|
|
+ prefix[i] = j;
|
|
|
+ i++;
|
|
|
+ }
|
|
|
+ return prefix;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ *
|
|
|
+ * @param haystack
|
|
|
+ * @param needle
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ public static int StrStr(String haystack, String needle)
|
|
|
+ {
|
|
|
+ if(needle.length()>haystack.length()) return -1;
|
|
|
+ int n = needle.length(), m = haystack.length();
|
|
|
+ int[] prefix = Build_PrefixTable(needle,n);
|
|
|
+ for(int i=0,j=0;i<m;i++)
|
|
|
+ {
|
|
|
+ while(j>0 && haystack.charAt(i)!= needle.charAt(j))
|
|
|
+ {
|
|
|
+ j=prefix[j-1];
|
|
|
+ }
|
|
|
+ if(haystack.charAt(i)==needle.charAt(j))
|
|
|
+ {
|
|
|
+ j++;
|
|
|
+ }
|
|
|
+ if(j==n)
|
|
|
+ {
|
|
|
+ return i-n +1;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 分段并排序
|
|
|
+ * @param list
|
|
|
+ */
|
|
|
+ public static List<ResumeProject> toProjectSubsection(List<Map<String,Object>> list){
|
|
|
+ //排序
|
|
|
+ Collections.sort(list, new Comparator<Map<String,Object>>() {
|
|
|
+ @Override
|
|
|
+ public int compare(Map<String,Object> stu1, Map<String,Object> stu2) {
|
|
|
+ return Integer.valueOf(stu1.get("key").toString()) - Integer.valueOf(stu2.get("key").toString());
|
|
|
+ }
|
|
|
+ });
|
|
|
+ /**
|
|
|
+ * 数据结构定义
|
|
|
+ */
|
|
|
+ //当前小标识
|
|
|
+ String type = null;
|
|
|
+ String sign = "";
|
|
|
+ ResumeProject resumeProject = null;
|
|
|
+ List<ResumeProject> projects = new ArrayList<>();
|
|
|
+ //处理数据分析
|
|
|
+ for (int i = 0; i < list.size(); i++) {
|
|
|
+ Map<String,Object> map = list.get(i);
|
|
|
+ Map<String,String> dataList = (Map<String,String>) map.get("dataList");
|
|
|
+ if(dataList != null && dataList.size() > 0){
|
|
|
+ for (Map.Entry<String,String> dataMap: dataList.entrySet()) {
|
|
|
+ ProjectType projectType = ProjectUtils.toJudgeType(dataMap.getKey());
|
|
|
+ type = projectType.getCode();
|
|
|
+ if(projectType.getCode().equals(ProjectType.XMMC.getCode())){//项目名称
|
|
|
+ if(resumeProject!=null&&StringUtils.isNotEmpty(resumeProject.getProjectName())){
|
|
|
+ projects.add(resumeProject);
|
|
|
+ }
|
|
|
+ resumeProject = new ResumeProject();
|
|
|
+ resumeProject.setProjectName(dataMap.getValue());
|
|
|
+ }
|
|
|
+ if(projectType.getCode().equals(ProjectType.XMJJ.getCode()))//项目描述
|
|
|
+ if(resumeProject!=null){
|
|
|
+ if(StringUtils.isNotEmpty(sign)&&sign.equals(ProjectType.XMJJ.getCode())){
|
|
|
+ projects.add(resumeProject);
|
|
|
+ resumeProject = new ResumeProject();
|
|
|
+ resumeProject.setProjectName(list.get(i-1).get("content").toString());
|
|
|
+ }else{
|
|
|
+ resumeProject.setProjectDetail(dataMap.getValue());
|
|
|
+ }
|
|
|
+ }else{
|
|
|
+ if(i!=0){
|
|
|
+ sign = ProjectType.XMJJ.getCode();
|
|
|
+ resumeProject = new ResumeProject();
|
|
|
+ resumeProject.setProjectName(list.get(i-1).get("content").toString());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if(projectType.getCode().equals(ProjectType.XMZZ.getCode())) //责任描述
|
|
|
+ if(resumeProject!=null){
|
|
|
+ if(StringUtils.isNotEmpty(sign)&&sign.equals(ProjectType.XMZZ.getCode())){
|
|
|
+ projects.add(resumeProject);
|
|
|
+ resumeProject = new ResumeProject();
|
|
|
+ resumeProject.setProjectName(list.get(i-1).get("content").toString());
|
|
|
+ }else{
|
|
|
+ resumeProject.setPartIn(dataMap.getValue());
|
|
|
+ }
|
|
|
+ }else{
|
|
|
+ if(i!=0){
|
|
|
+ sign = ProjectType.XMZZ.getCode();
|
|
|
+ resumeProject = new ResumeProject();
|
|
|
+ resumeProject.setProjectName(list.get(i-1).get("content").toString());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if(projectType.getCode().equals(ProjectType.XMJS.getCode()))//软件环境
|
|
|
+ if(resumeProject!=null){
|
|
|
+ if(StringUtils.isNotEmpty(sign)&&sign.equals(ProjectType.XMJS.getCode())){
|
|
|
+ projects.add(resumeProject);
|
|
|
+ resumeProject = new ResumeProject();
|
|
|
+ resumeProject.setProjectName(list.get(i-1).get("content").toString());
|
|
|
+ }else{
|
|
|
+ resumeProject.setDevelopEnvironment(dataMap.getValue());
|
|
|
+ }
|
|
|
+ }else{
|
|
|
+ if(i!=0){
|
|
|
+ sign = ProjectType.XMJS.getCode();
|
|
|
+ resumeProject = new ResumeProject();
|
|
|
+ resumeProject.setProjectName(list.get(i-1).get("content").toString());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if(projectType.getCode().equals(ProjectType.KFGJ.getCode()))
|
|
|
+ if(resumeProject!=null){
|
|
|
+ if(StringUtils.isNotEmpty(sign)&&sign.equals(ProjectType.KFGJ.getCode())){
|
|
|
+ projects.add(resumeProject);
|
|
|
+ resumeProject = new ResumeProject();
|
|
|
+ resumeProject.setProjectName(list.get(i-1).get("content").toString());
|
|
|
+ }else{
|
|
|
+ resumeProject.setDevelopTools(dataMap.getValue());//开发工具
|
|
|
+ }
|
|
|
+ }else{
|
|
|
+ if(i!=0){
|
|
|
+ sign = ProjectType.KFGJ.getCode();
|
|
|
+ resumeProject = new ResumeProject();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if(projectType.getCode().equals(ProjectType.XMZQ.getCode()));//项目周期
|
|
|
+ }
|
|
|
+ }else {
|
|
|
+ //处理无标识数据问题
|
|
|
+ if (type != null && resumeProject!=null) {
|
|
|
+ if (type.equals(ProjectType.XMMC.getCode()))
|
|
|
+ resumeProject.setProjectName(resumeProject.getProjectName() + map.get("content").toString());
|
|
|
+ if (type.equals(ProjectType.XMJJ.getCode()))
|
|
|
+ resumeProject.setProjectDetail(resumeProject.getProjectDetail() + map.get("content").toString());
|
|
|
+ if (type.equals(ProjectType.XMZZ.getCode()))
|
|
|
+ resumeProject.setPartIn(resumeProject.getPartIn() + map.get("content").toString());
|
|
|
+ if (type.equals(ProjectType.XMJS.getCode()))
|
|
|
+ resumeProject.setDevelopEnvironment(resumeProject.getDevelopEnvironment() + map.get("content").toString());
|
|
|
+ if (type.equals(ProjectType.KFGJ.getCode()))
|
|
|
+ resumeProject.setDevelopTools(resumeProject.getDevelopTools() + map.get("content").toString());
|
|
|
+ if (type.equals(ProjectType.XMZQ.getCode()));
|
|
|
+ }else{
|
|
|
+
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if(resumeProject!=null
|
|
|
+ &&StringUtils.isNotEmpty(resumeProject.getProjectName())
|
|
|
+ &&resumeProject.getProjectName().length()>50){
|
|
|
+ resumeProject.setProjectName(resumeProject.getProjectName().substring(0, 50));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //判断最后一个是否存在
|
|
|
+ if(resumeProject!=null&&StringUtils.isNotEmpty(resumeProject.getProjectName())){
|
|
|
+ projects.add(resumeProject);
|
|
|
+ }
|
|
|
+ return projects;
|
|
|
+ }
|
|
|
+
|
|
|
+}
|