0%

自动翻译工具(baidu)

文章字数:935,阅读全文大约需要3分钟

抓取百度翻译接口,将一个国际化文件翻译成若干指定语言的国际化文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
package com.colin.util;

import java.io.*;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.security.KeyManagementException;
import java.security.KeyStoreException;
import java.security.NoSuchAlgorithmException;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;

import javax.net.ssl.SSLContext;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import javax.script.ScriptException;

import org.apache.http.conn.ssl.NoopHostnameVerifier;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.ssl.SSLContextBuilder;
import org.apache.http.ssl.TrustStrategy;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpHeaders;
import org.springframework.http.ResponseEntity;
import org.springframework.http.client.HttpComponentsClientHttpRequestFactory;
import org.springframework.http.converter.HttpMessageConverter;
import org.springframework.http.converter.StringHttpMessageConverter;
import org.springframework.util.LinkedMultiValueMap;
import org.springframework.util.MultiValueMap;
import org.springframework.util.StringUtils;
import org.springframework.web.client.RestTemplate;

import com.alibaba.fastjson.JSONObject;

/**
* @author colin.cheng
* @date 2020-05-19
* @since 1.0.0
*/
public class Test1 {

// 源文件
private static String basePath = "D:\\test\\translate\\123\\";
private static String fromName = "eventCenter_en_US.properties";

// 百度翻译接口相关
private static String path = "https://fanyi.baidu.com/v2transapi?";
private static String token = "5a9892b6584a2fed08f75bed87736039";
private static String windowgtk = "'320305.131321201'";

private static Charset fileCharset = StandardCharsets.UTF_8;
private static String fileSuffix = fromName.split("\\.")[1];

// 如果目标语言国际化存在,是否覆盖。默认跳过。
private static boolean override = false;
// 每次翻译之后休眠时间,太快会被封禁ip
private static int sleepTime = 2;

// 网络异常后继续上次的进度
private static Integer lastLine = 13;
private static LanguageEnum lastLanguage = LanguageEnum.AR;

private static AtomicInteger count = new AtomicInteger();

public static void main(String[] args) {
try {
List<LanguageEnum> to = new LinkedList<>();
LanguageEnum from;
String modelName = fromName.split("\\.")[0];
modelName = modelName.split("_")[0];
from = LanguageEnum.getByZKName(fromName.split("\\.")[0].replace(modelName + "_", ""));

// 翻译成那些语言
// 指定某个文件
// to.add(LanguageEnum.ZH_TW);
// 或者全部翻译
to.addAll(from.getOther());

List<String> fromLine = getLineByFile(basePath + fromName);
for (LanguageEnum toLang : to) {
createToFile(from, toLang, modelName, fromLine);
}
System.out.println("处理完毕,总共翻译 [" + count.get() + "] 次");
} catch (Exception e) {
e.printStackTrace();
}
}

private static ScriptEngine scriptEngine;
private static RestTemplate restTemplate;

enum LanguageEnum {
// 中文
ZH_CN("zh_CN", "zh"),
// 英文
EN("en_US", "en"),
// 繁体
ZH_TW("zh_TW", "cht"),
// 越南语
VI("vi", "vie"),
// 泰语
TH("th", "th"),
// 俄语
RU("ru_RU", "ru"),
// 罗马尼亚
RO("ro", "rom"),
// 葡萄牙
PT("pt_BR", "pt"),
// 波兰语
PL("pl", "pl"),
// 韩语
KO("ko_KR", "kor"),
// 日语
JA("ja_JP", "jp"),
// 意大利
IT("it", "it"),
// 印尼
IN("in_ID", "id"),
// 法语
FR("fr", "fra"),
// 西班牙
ES("es", "spa"),
// 德语
DE("de", "de"),
// 阿拉伯
AR("ar", "ara"),
// 土耳其语
TR("tr", "tr");

public String zkName;
public String bdName;

LanguageEnum(String zkName, String bdName) {
this.bdName = bdName;
this.zkName = zkName;
}

public static LanguageEnum getByZKName(String zkName) {
for (LanguageEnum value : LanguageEnum.values()) {
if (value.zkName.equals(zkName)) {
return value;
}
}
return null;
}

public List<LanguageEnum> getOther() {
List<LanguageEnum> list = new LinkedList<>();
for (LanguageEnum value : LanguageEnum.values()) {
if (value != this) {
list.add(value);
}
}
return list;
}
}

/**
* 翻译
*
* @param fromEnum 当前语言
* @param toEnmu 需要翻译的语言
* @param val 需要翻译的值
* @return
* @throws Exception
*/
public static String getTranslate(LanguageEnum fromEnum, LanguageEnum toEnmu, String val) throws Exception {
String from = fromEnum.bdName, to = toEnmu.bdName;
String url = path + "from=" + from + "&to=" + to;
JSONObject res =
postFromUrl(url, new Object[] {"from", from}, new Object[] {"to", to}, new Object[] {"query", val},
new Object[] {"transtype", "translang"}, new Object[] {"simple_means_flag", "3"},
new Object[] {"sign", sign(val)}, new Object[] {"token", token}, new Object[] {"domain", "common"});
String resStr = res.getJSONObject("trans_result").getJSONArray("data").getJSONObject(0).getString("dst");
System.out
.println("正在翻译 [" + val + "] , translate = [" + from + " => " + to + "]" + " , 翻译结果 [" + resStr + "]");
count.getAndIncrement();
return resStr;
}

public static String sign(String v) throws ScriptException {
v = v.replace("'", "\\'");
scriptEngine.eval("var sign = e('" + v + "')");
String sign = scriptEngine.get("sign") + "";
return sign;
}

private static JSONObject postFromUrl(String url, Object[]... param) throws Exception {
final HttpEntity<MultiValueMap<Object, Object>> request = getRequestByParam(param);
ResponseEntity<String> entity;
entity = restTemplate.postForEntity(url, request, String.class);
if (entity.getStatusCode().is2xxSuccessful()) {
return JSONObject.parseObject(entity.getBody());
}
throw new RuntimeException("err");
}

private static void createToFile(LanguageEnum from, LanguageEnum to, String modelName, List<String> lines)
throws Exception {
String name = basePath + modelName + "_" + to.zkName + "." + fileSuffix;
if (!override && Files.exists(Paths.get(name))) {
if (lastLanguage == null || lastLine == null) {
System.out.println("文件 [" + name + "] 已存在,跳过");
return;
} else if (!isAppend(to, lastLanguage)) {
System.out.println("文件 [" + name + "] 已存在,跳过");
return;
}
}
System.out.println("当前处理文件为 [" + name + "]");
try (FileOutputStream outputStream = new FileOutputStream(name, isAppend(to, lastLanguage))) {
try (OutputStreamWriter writer = new OutputStreamWriter(outputStream, fileCharset)) {
for (int index = 0; index < lines.size(); index++) {
String line = lines.get(index);
System.out.println("当前文件进度: " + index + "/" + lines.size());
if (to.equals(lastLanguage) && index < lastLine) {
continue;
}
if (line.startsWith("#") || StringUtils.isEmpty(line)) {
writer.write(line + "\r\n");
} else {
String key = line.split("=")[0];
String val = line.split("=")[1];
writer.write(key + "=" + getTranslate(from, to, val) + "\r\n");
TimeUnit.SECONDS.sleep(sleepTime);
}
}
}
}
}

private static boolean isAppend(LanguageEnum to, LanguageEnum lastLanguage) {
return lastLanguage != null && to.equals(lastLanguage);
}

private static List<String> getLineByFile(String path) throws Exception {
try (FileInputStream inputStream = new FileInputStream(path)) {
try (InputStreamReader reader = new InputStreamReader(inputStream, fileCharset)) {
try (BufferedReader bufferedReader = new BufferedReader(reader)) {
return bufferedReader.lines().collect(Collectors.toList());
}
}
}
}

private static HttpEntity<MultiValueMap<Object, Object>> getRequestByParam(Object[]... params) {
final HttpHeaders headers = new HttpHeaders();
final LinkedMultiValueMap<Object, Object> map = new LinkedMultiValueMap<>();
if (params != null) {
for (Object[] param : params) {
map.add(param[0], param[1]);
}
}
String paramStr = "Host: fanyi.baidu.com\n"
+ "User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:93.0) Gecko/20100101 Firefox/93.0\n"
+ "Accept: */*\n" + "Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2\n"
+ "Accept-Encoding: gzip, deflate, br\n"
+ "Content-Type: application/x-www-form-urlencoded; charset=UTF-8\n" + "X-Requested-With: XMLHttpRequest\n"
+ "Origin: https://fanyi.baidu.com\n" + "Connection: keep-alive\n" + "Referer: https://fanyi.baidu.com/\n"
+ "Cookie: BAIDUID=1F3AA9B69AA7D53DE31F86572B715FAE:FG=1; BIDUPSID=1F3AA9B69AA7D53DDA50BBE8079D1BC7; PSTM=1630997334; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; td_cookie=2315277960; __yjs_duid=1_580e46fc7808cbbe7243e345c691bd4c1635751328621; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1635751323,1635822756,1635904954; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; H_PS_PSSID=34889_34068_31253_34862_34599_34584_34505_34916_34606_26350_34972_34868; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1635904954; delPer=0; PSINO=5\n"
+ "Sec-Fetch-Dest: empty\n" + "Sec-Fetch-Mode: cors\n" + "Sec-Fetch-Site: same-origin";
final String[] split = paramStr.split("\n");
for (String paramVal : split) {
final String[] split1 = paramVal.split(": ");
headers.set(split1[0], split1[1].trim());
}
return new HttpEntity<>(map, headers);
}

public static RestTemplate getRestTemplate()
throws KeyStoreException, NoSuchAlgorithmException, KeyManagementException {
SSLContext sslContext = new SSLContextBuilder().loadTrustMaterial(null, new TrustStrategy() {
@Override
public boolean isTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
return true;
}
}).build();
SSLConnectionSocketFactory csf =
new SSLConnectionSocketFactory(sslContext, new String[] {"TLSv1.2"}, null, NoopHostnameVerifier.INSTANCE);
CloseableHttpClient httpClient = HttpClients.custom().setSSLSocketFactory(csf).build();
HttpComponentsClientHttpRequestFactory requestFactory = new HttpComponentsClientHttpRequestFactory();
requestFactory.setHttpClient(httpClient);
RestTemplate restTemplate = new RestTemplate(requestFactory);
return restTemplate;
}

static {
try {
ScriptEngineManager sem = new ScriptEngineManager();
scriptEngine = sem.getEngineByName("javascript");
String signFunc = "var window = [];\n" + "\tfunction e(r) {\n" + "\tvar i = null;\n"
+ " var o = r.match(/[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]/g);\n" + " if (null === o) {\n"
+ " var t = r.length;\n"
+ " t > 30 && (r = '' + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr( - 10, 10))\n"
+ " } else {\n"
+ " for (var e = r.split(/[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]/), C = 0, h = e.length, f = [\n"
+ " ]; h > C; C++) '' !== e[C] && f.push.apply(f, a(e[C].split(''))),\n"
+ " C !== h - 1 && f.push(o[C]);\n" + " var g = f.length;\n"
+ " g > 30 && (r = f.slice(0, 10).join('') + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join('') + f.slice( - 10).join(''))\n"
+ " }\n" + " var u = void 0,\n"
+ " l = '' + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);\n"
+ " u = null !== i ? i : (i = " + windowgtk + " || '') || '';\n"
+ " for (var d = u.split('.'), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [\n"
+ " ], c = 0, v = 0; v < r.length; v++) {\n" + " var A = r.charCodeAt(v);\n"
+ " 128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)), S[c++] = A >> 18 | 240, S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224, S[c++] = A >> 6 & 63 | 128), S[c++] = 63 & A | 128)\n"
+ " }\n"
+ " for (var p = m, F = '' + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ('' + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = '' + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ('' + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ('' + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++) p += S[b],\n"
+ " p = n(p, F);\n" + " return p = n(p, D),\n" + " p ^= s,\n"
+ " 0 > p && (p = (2147483647 & p) + 2147483648),\n" + " p %= 1000000,\n"
+ " p.toString() + '.' + (p ^ m)\n" + " }\n" + " \n" + " function a(r) {\n"
+ " if (Array.isArray(r)) {\n"
+ " for (var o = 0, t = Array(r.length); o < r.length; o++) t[o] = r[o];\n" + " return t\n"
+ " }\n" + " return Array.from(r)\n" + " }\n" + " function n(r, o) {\n"
+ " for (var t = 0; t < o.length - 2; t += 3) {\n" + " var a = o.charAt(t + 2);\n"
+ " a = a >= 'a' ? a.charCodeAt(0) - 87 : Number(a),\n"
+ " a = '+' === o.charAt(t + 1) ? r >>> a : r << a,\n"
+ " r = '+' === o.charAt(t) ? r + a & 4294967295 : r ^ a\n" + " }\n" + " return r\n" + " }";
scriptEngine.eval(signFunc);
restTemplate = getRestTemplate();
List<HttpMessageConverter<?>> list = restTemplate.getMessageConverters();
for (HttpMessageConverter<?> httpMessageConverter : list) {
if (httpMessageConverter instanceof StringHttpMessageConverter) {
((StringHttpMessageConverter)httpMessageConverter).setDefaultCharset(StandardCharsets.UTF_8);
break;
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}