data_cleaning.sql 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. CREATE OR REPLACE PROCEDURE shanglifeecif.data_cleaning() -- 创建主存储过程
  2. IS
  3. DECLARE
  4. BEGIN
  5. --姓名 sametype:3
  6. INSERT INTO shanglifeecif.samecustomer(
  7. sid,
  8. scustid,
  9. name,
  10. gender,
  11. birthday,
  12. idcard,
  13. OtherIdNumber,
  14. sameType
  15. )
  16. SELECT
  17. reflect("java.util.UUID", "randomUUID"),
  18. t.customer_id,
  19. t.customer_name,
  20. t.gender,
  21. to_char(t.birthdate,"yyyy-MM-dd") birthdate,
  22. CASE t.id_type WHEN '0' THEN t.id_no END AS idcard,
  23. CASE WHEN t.id_type <> '0' THEN t.id_no END AS otherIdnumber,
  24. 3
  25. FROM dsj.t_customer_class t WHERE t.customer_name RLIKE '^.*[!$^|\=`]{1,}.*$' OR t.customer_name RLIKE '^.*[0-9]{1,}.*$' ;
  26. --性别 sametype:4
  27. INSERT INTO shanglifeecif.samecustomer(
  28. sid,
  29. scustid,
  30. name,
  31. gender,
  32. birthday,
  33. idcard,
  34. OtherIdNumber,
  35. sameType
  36. )
  37. SELECT
  38. reflect("java.util.UUID", "randomUUID"),
  39. t.customer_id,
  40. t.customer_name,
  41. t.gender,
  42. to_char(t.birthdate,"yyyy-MM-dd") birthdate,
  43. CASE t.id_type WHEN '0' THEN t.id_no END AS idcard,
  44. CASE WHEN t.id_type <> '0' THEN t.id_no END AS otherIdnumber,
  45. 4
  46. FROM dsj.t_customer_class t WHERE t.gender IS NULL AND t.customer_id IN (SELECT p.customerno FROM dsj.policy_information p WHERE p.sex IS null );
  47. --证件号码 sametype:6
  48. INSERT INTO shanglifeecif.samecustomer(
  49. sid,
  50. scustid,
  51. name,
  52. gender,
  53. birthday,
  54. idcard,
  55. sameType
  56. )
  57. SELECT
  58. reflect("java.util.UUID", "randomUUID"),
  59. si.scustid,
  60. si.name,
  61. si.gender,
  62. si.birthday,
  63. si.idcard,
  64. 6
  65. FROM shanglifeecif.individual si WHERE length(si.idcard)<18 AND si.custtype LIKE '%投保人%' AND si.idcard IS NOT null;
  66. --出生日期 sametype:7
  67. INSERT INTO shanglifeecif.samecustomer(
  68. sid,
  69. scustid,
  70. name,
  71. gender,
  72. birthday,
  73. idcard,
  74. OtherIdNumber,
  75. sameType
  76. )
  77. SELECT
  78. reflect("java.util.UUID", "randomUUID"),
  79. t.customer_id,
  80. t.customer_name,
  81. t.gender,
  82. to_char(t.birthdate,"yyyy-MM-dd") birthdate,
  83. CASE t.id_type WHEN '0' THEN t.id_no END AS idcard,
  84. CASE WHEN t.id_type <> '0' THEN t.id_no END AS otherIdnumber,
  85. 7
  86. FROM dsj.t_customer_class t WHERE t.birthdate IS NULL AND t.customer_id IN (SELECT p.customerno FROM dsj.policy_information p WHERE p.birthday IS null UNION SELECT i.insuredno FROM dsj.policy_information i WHERE i.insuredbirthday IS null);
  87. --手机号 sametype:8
  88. INSERT INTO shanglifeecif.samecustomer(
  89. sid,
  90. scustid,
  91. name,
  92. gender,
  93. birthday,
  94. idcard,
  95. OtherIdNumber,
  96. mobile,
  97. sameType
  98. )
  99. SELECT
  100. reflect("java.util.UUID", "randomUUID"),
  101. t.customer_id,
  102. t.customer_name,
  103. t.gender,
  104. to_char(t.birthdate,"yyyy-MM-dd") birthdate,
  105. CASE t.id_type WHEN '0' THEN t.id_no END AS idcard,
  106. CASE WHEN t.id_type <> '0' THEN t.id_no END AS otherIdnumber,
  107. t.mobile,
  108. 8
  109. FROM dsj.t_customer_class t WHERE t.mobile NOT RLIKE '^1[0-9]{10}$' AND t.customer_id IN (SELECT p.customerno FROM dsj.policy_information p);
  110. --邮编 sametype:9
  111. INSERT INTO shanglifeecif.samecustomer(
  112. sid,
  113. scustid,
  114. name,
  115. gender,
  116. birthday,
  117. idcard,
  118. zipcode,
  119. sameType
  120. )
  121. SELECT
  122. reflect("java.util.UUID", "randomUUID"),
  123. si.scustid,
  124. si.name,
  125. si.gender,
  126. si.birthday,
  127. si.idcard,
  128. si.zipcode,
  129. 9
  130. FROM shanglifeecif.individual si WHERE (length(si.zipcode)>0 and length(si.zipcode)<6) OR length(si.zipcode)>6;
  131. --主被保险人性别异常数据 sametype:11
  132. INSERT INTO shanglifeecif.samecustomer(
  133. sid,
  134. scustid,
  135. name,
  136. gender,
  137. birthday,
  138. idcard,
  139. OtherIdNumber,
  140. sameType
  141. )
  142. SELECT
  143. reflect("java.util.UUID", "randomUUID"),
  144. t.customer_id,
  145. t.customer_name,
  146. t.gender,
  147. to_char(t.birthdate,"yyyy-MM-dd") birthdate,
  148. CASE t.id_type WHEN '0' THEN t.id_no END AS idcard,
  149. CASE WHEN t.id_type <> '0' THEN t.id_no END AS otherIdnumber,
  150. 11
  151. FROM dsj.t_customer_class t WHERE (t.gender IS NULL OR t.gender = '2') AND t.customer_id IN (SELECT p.insuredno FROM dsj.policy_information p WHERE p.insuredsex IS null);
  152. --邮箱 sametype:14
  153. INSERT INTO shanglifeecif.samecustomer(
  154. sid,
  155. scustid,
  156. name,
  157. gender,
  158. birthday,
  159. idcard,
  160. OtherIdNumber,
  161. email,
  162. sameType
  163. )
  164. SELECT
  165. reflect("java.util.UUID", "randomUUID"),
  166. t.customer_id,
  167. t.customer_name,
  168. t.gender,
  169. to_char(t.birthdate,"yyyy-MM-dd") birthdate,
  170. CASE t.id_type WHEN '0' THEN t.id_no END AS idcard,
  171. CASE WHEN t.id_type <> '0' THEN t.id_no END AS otherIdnumber,
  172. t.email,
  173. 14
  174. FROM dsj.t_customer_class t WHERE t.email not RLIKE '^([a-zA-Z0-9._%-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4})*$' AND t.customer_id IN (SELECT p.customerno FROM dsj.policy_information p UNION SELECT pi.insuredno FROM dsj.policy_information pi);
  175. --主被保险人手机异常数据 sametype:16
  176. INSERT INTO shanglifeecif.samecustomer(
  177. sid,
  178. scustid,
  179. name,
  180. gender,
  181. birthday,
  182. idcard,
  183. OtherIdNumber,
  184. mobile,
  185. sameType
  186. )
  187. SELECT
  188. reflect("java.util.UUID", "randomUUID"),
  189. t.customer_id,
  190. t.customer_name,
  191. t.gender,
  192. to_char(t.birthdate,"yyyy-MM-dd") birthdate,
  193. CASE t.id_type WHEN '0' THEN t.id_no END AS idcard,
  194. CASE WHEN t.id_type <> '0' THEN t.id_no END AS otherIdnumber,
  195. t.mobile,
  196. 16
  197. FROM dsj.t_customer_class t WHERE t.mobile NOT RLIKE '^1[0-9]{10}$' AND t.customer_id IN (SELECT p.insuredno FROM dsj.policy_information p);
  198. --主被保险人证件号码 sametype:17
  199. INSERT INTO shanglifeecif.samecustomer(
  200. sid,
  201. scustid,
  202. name,
  203. gender,
  204. birthday,
  205. idcard,
  206. sameType
  207. )
  208. SELECT
  209. reflect("java.util.UUID", "randomUUID"),
  210. si.scustid,
  211. si.name,
  212. si.gender,
  213. si.birthday,
  214. si.idcard,
  215. 17
  216. FROM shanglifeecif.individual si WHERE length(si.idcard)<18 AND si.custtype LIKE '%被保人%' AND si.idcard IS NOT null;
  217. --业务员没错 sametype:18
  218. INSERT INTO shanglifeecif.samecustomer(
  219. sid,
  220. scustid,
  221. name,
  222. gender,
  223. birthday,
  224. idcard,
  225. OtherIdNumber,
  226. agentname,
  227. sameType
  228. )
  229. SELECT
  230. reflect("java.util.UUID", "randomUUID"),
  231. t.customer_id,
  232. t.customer_name,
  233. t.gender,
  234. to_char(t.birthdate,"yyyy-MM-dd") birthdate,
  235. CASE t.id_type WHEN '0' THEN t.id_no END AS idcard,
  236. CASE WHEN t.id_type <> '0' THEN t.id_no END AS otherIdnumber,
  237. p.agentname,
  238. 18
  239. FROM dsj.t_customer_class t,dsj.policy_information p WHERE t.customer_id = p.customerno and p.agentname not RLIKE '^[\u4e00-\u9fa5]|[^\x00-\xff]$';
  240. --业绩归属 sametype:19
  241. INSERT INTO shanglifeecif.samecustomer(
  242. sid,
  243. scustid,
  244. name,
  245. gender,
  246. birthday,
  247. idcard,
  248. OtherIdNumber,
  249. salecomname,
  250. sameType
  251. )
  252. SELECT
  253. reflect("java.util.UUID", "randomUUID"),
  254. t.customer_id,
  255. t.customer_name,
  256. t.gender,
  257. to_char(t.birthdate,"yyyy-MM-dd") birthdate,
  258. CASE t.id_type WHEN '0' THEN t.id_no END AS idcard,
  259. CASE WHEN t.id_type <> '0' THEN t.id_no END AS otherIdnumber,
  260. p.salecomname,
  261. 19
  262. FROM dsj.t_customer_class t,dsj.policy_information p WHERE t.customer_id = p.customerno and p.salecomname not RLIKE '^[\u4e00-\u9fa5]|[^\x00-\xff]$';
  263. dbms_output.put_line('data_cleaning函数跑批完成!');
  264. END;