|
@@ -8,11 +8,15 @@ import org.slf4j.LoggerFactory;
|
|
|
|
|
|
import java.io.UnsupportedEncodingException;
|
|
|
import java.net.URLEncoder;
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.List;
|
|
|
+import java.util.regex.Matcher;
|
|
|
+import java.util.regex.Pattern;
|
|
|
|
|
|
/**
|
|
|
* @author koucx
|
|
|
* @version 1.0
|
|
|
- * @descption: TODO
|
|
|
+ * @descption: 中文翻译英文工具类
|
|
|
* @company 神州数码通用软件(洛阳)有限公司
|
|
|
* @copyright (c) 2019 LuoYang DGT Co'Ltd Inc. All rights reserved.
|
|
|
* @date 2020-03-24
|
|
@@ -25,6 +29,62 @@ public class TranslateUtil {
|
|
|
private static final String TRANSLATE_URL_OLD = "http://fanyi.youdao.com/translate";
|
|
|
private static final String TRANSLATE_URL = "http://fanyi.youdao.com/openapi.do";
|
|
|
|
|
|
+ private static final String regEx_script = "<script[^>]*?>[\\s\\S]*?<\\/script>"; // 定义script的正则表达式
|
|
|
+ private static final String regEx_style = "<style[^>]*?>[\\s\\S]*?<\\/style>"; // 定义style的正则表达式
|
|
|
+ private static final String regEx_html = "<[^>]+>"; // 定义HTML标签的正则表达式
|
|
|
+ private static final String regEx_space = "\\s*|\t|\r|\n";//定义空格回车换行符
|
|
|
+
|
|
|
+
|
|
|
+ /**
|
|
|
+ * @param htmlStr
|
|
|
+ * @return 删除Html标签
|
|
|
+ */
|
|
|
+ public static String delHTMLTag(String htmlStr) {
|
|
|
+ Pattern p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
|
|
|
+ Matcher m_script = p_script.matcher(htmlStr);
|
|
|
+ htmlStr = m_script.replaceAll(""); // 过滤script标签
|
|
|
+
|
|
|
+ Pattern p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE);
|
|
|
+ Matcher m_style = p_style.matcher(htmlStr);
|
|
|
+ htmlStr = m_style.replaceAll(""); // 过滤style标签
|
|
|
+
|
|
|
+ Pattern p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);
|
|
|
+ Matcher m_html = p_html.matcher(htmlStr);
|
|
|
+ List<String> matchStrs = new ArrayList<>();
|
|
|
+ while (m_html.find()) { //此处find()每次被调用后,会偏移到下一个匹配
|
|
|
+ matchStrs.add(m_html.group());//获取当前匹配的值
|
|
|
+ }
|
|
|
+
|
|
|
+ for (int i = 0; i < matchStrs.size(); i++) {
|
|
|
+ System.out.println(matchStrs.get(i));
|
|
|
+ }
|
|
|
+
|
|
|
+ htmlStr = m_html.replaceAll(""); // 过滤html标签
|
|
|
+
|
|
|
+ Pattern p_space = Pattern.compile(regEx_space, Pattern.CASE_INSENSITIVE);
|
|
|
+ Matcher m_space = p_space.matcher(htmlStr);
|
|
|
+ htmlStr = m_space.replaceAll(""); // 过滤空格回车标签
|
|
|
+ htmlStr = htmlStr.replaceAll(" ", "");
|
|
|
+ htmlStr = htmlStr.replaceAll("↵", "");
|
|
|
+ return htmlStr.trim(); // 返回文本字符串
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 处理含有html的翻译
|
|
|
+ * @param msg
|
|
|
+ * @param html
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ public static String getEnTranslateInfo(String msg,String html){
|
|
|
+ //去除所有html标签
|
|
|
+ msg = delHTMLTag(html);
|
|
|
+ //翻译汉字
|
|
|
+ String enTranslateInfo = getEnTranslateInfo(msg);
|
|
|
+ html = html.replaceAll(msg,enTranslateInfo);
|
|
|
+ return html;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
/**
|
|
|
* @Description: 调用有道翻译接口中文翻译成英文
|
|
|
* @Param: [msg]
|
|
@@ -34,42 +94,48 @@ public class TranslateUtil {
|
|
|
**/
|
|
|
public static String getEnTranslateInfo(String msg){
|
|
|
String params = "doctype=json&type=ZH_CN2EN&i="+msg;
|
|
|
- String enInfo = HttpUtils.sendGet(TRANSLATE_URL_OLD, params);
|
|
|
- /**
|
|
|
- * {
|
|
|
- * "type": "ZH_CN2EN",
|
|
|
- * "errorCode": 0,
|
|
|
- * "elapsedTime": 1,
|
|
|
- * "translateResult": [
|
|
|
- * [
|
|
|
- * {
|
|
|
- * "src": "计算",//需要翻译的中文
|
|
|
- * "tgt": "To calculate"//翻译后的中文
|
|
|
- * }
|
|
|
- * ]
|
|
|
- * ]
|
|
|
- * }
|
|
|
- *
|
|
|
- * 根据以上json串获取相应英文翻译
|
|
|
- *
|
|
|
- */
|
|
|
- JSONObject object = JSONObject.fromObject(enInfo);
|
|
|
- if(object!=null){
|
|
|
- JSONArray array = object.getJSONArray("translateResult");
|
|
|
- if(array!=null && array.size()>0){
|
|
|
- for (Object obj:array) {
|
|
|
- JSONArray arr = JSONArray.fromObject(obj);
|
|
|
- for (Object o:arr) {
|
|
|
- JSONObject json = JSONObject.fromObject(o);
|
|
|
- if(json!=null && json.containsKey("tgt")){
|
|
|
- String tgt = json.getString("tgt");
|
|
|
- System.out.println("'"+json.getString("src")+"'翻译后'"+tgt+"'");
|
|
|
- return tgt;
|
|
|
+ try{
|
|
|
+ String enInfo = HttpUtils.sendGet(TRANSLATE_URL_OLD, params);
|
|
|
+ /**
|
|
|
+ * {
|
|
|
+ * "type": "ZH_CN2EN",
|
|
|
+ * "errorCode": 0,
|
|
|
+ * "elapsedTime": 1,
|
|
|
+ * "translateResult": [
|
|
|
+ * [
|
|
|
+ * {
|
|
|
+ * "src": "计算",//需要翻译的中文
|
|
|
+ * "tgt": "To calculate"//翻译后的中文
|
|
|
+ * }
|
|
|
+ * ]
|
|
|
+ * ]
|
|
|
+ * }
|
|
|
+ *
|
|
|
+ * 根据以上json串获取相应英文翻译
|
|
|
+ *
|
|
|
+ */
|
|
|
+ if(StringUtils.isNotEmpty(enInfo)){
|
|
|
+ JSONObject object = JSONObject.fromObject(enInfo);
|
|
|
+ if(object!=null){
|
|
|
+ JSONArray array = object.getJSONArray("translateResult");
|
|
|
+ if(array!=null && array.size()>0){
|
|
|
+ for (Object obj:array) {
|
|
|
+ JSONArray arr = JSONArray.fromObject(obj);
|
|
|
+ for (Object o:arr) {
|
|
|
+ JSONObject json = JSONObject.fromObject(o);
|
|
|
+ if(json!=null && json.containsKey("tgt")){
|
|
|
+ String tgt = json.getString("tgt");
|
|
|
+ System.out.println("'"+json.getString("src")+"'翻译后'"+tgt+"'");
|
|
|
+ return tgt;
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+ }catch (Exception e){
|
|
|
+ log.info("翻译出错");
|
|
|
+ }
|
|
|
return "";
|
|
|
}
|
|
|
|
|
@@ -104,7 +170,13 @@ public class TranslateUtil {
|
|
|
|
|
|
|
|
|
public static void main(String[] args) {
|
|
|
- System.out.println(getEnTranslateInfo("<a>learning</a>"));
|
|
|
+ System.out.println(delHTMLTag("<p>\n" +
|
|
|
+ " </p><p></p>香味小圆蜡烛¥ 14.90 / 30 件<table style=\"width: 196.53pt;\" width=\"262\" height=\"72\" cellspacing=\"0\" cellpadding=\"0\" border=\"0\"><colgroup><col style=\"width:196.50pt;\" width=\"262\">\n" +
|
|
|
+ " </colgroup><tbody><tr style=\"height:54.00pt;\" height=\"72\">\n" +
|
|
|
+ " <td class=\"et2\" x:str=\"\" style=\"height:54.00pt;width:196.50pt;\" width=\"262\" height=\"72\">安装后尺寸<br>直径: 38 毫米<br>燃烧时长: 4 小时<br>包装数量: 30 件</td>\n" +
|
|
|
+ " </tr>\n" +
|
|
|
+ "</tbody><tbody></tbody><tbody></tbody><tbody></tbody></table>"));
|
|
|
+// System.out.println(getEnTranslateInfo("Furniture of the third generation"));
|
|
|
|
|
|
}
|
|
|
|