286 lines
9.4 KiB
JavaScript
286 lines
9.4 KiB
JavaScript
'use strict';
|
|
const qs = require('qs')
|
|
const axios = require('axios')
|
|
const Diff = require('diff')
|
|
|
|
const { createCoreService } = require('@strapi/strapi').factories;
|
|
const { ApplicationError } = require("@strapi/utils").errors
|
|
|
|
const LANG_MAP = {
|
|
fr: { field: 'francais', targetLang: 'fr', userPrompt: 'Tradui an fransé' },
|
|
en: { field: 'anglais', targetLang: 'en', userPrompt: 'Translate to English' },
|
|
es: { field: 'espagnol', targetLang: 'es', userPrompt: 'Traduce al español' },
|
|
de: { field: 'allemand', targetLang: 'de', userPrompt: 'Übersetze auf Deutsch' },
|
|
it: { field: 'italien', targetLang: 'it', userPrompt: 'Traduci in italiano' },
|
|
}
|
|
|
|
const ALL_LANGS = Object.keys(LANG_MAP)
|
|
|
|
function stripMarkdown(text) {
|
|
if (!text) return ''
|
|
return text
|
|
.replace(/#{1,6}\s+/g, '')
|
|
.replace(/\*\*(.*?)\*\*/gs, '$1')
|
|
.replace(/\*(.*?)\*/gs, '$1')
|
|
.replace(/__(.*?)__/gs, '$1')
|
|
.replace(/_(.*?)_/gs, '$1')
|
|
.replace(/\[([^\]]+)\]\([^\)]+\)/g, '$1')
|
|
.replace(/^[>\-\*\+]\s+/gm, '')
|
|
.replace(/\n{3,}/g, '\n\n')
|
|
.trim()
|
|
}
|
|
|
|
// Détecte si une transcription est probablement en français plutôt qu'en KA.
|
|
// Heuristique : si les pronoms personnels français représentent > 4 % des mots.
|
|
const FR_PRONOUNS = new Set(['je', 'tu', 'il', 'elle', 'nous', 'vous', 'ils', 'elles'])
|
|
|
|
function suspectFrench(text) {
|
|
if (!text) return false
|
|
const words = text.toLowerCase().match(/\b[a-zàâäéèêëîïôöùûüç]+\b/g) || []
|
|
if (words.length < 10) return false
|
|
const frCount = words.filter(w => FR_PRONOUNS.has(w)).length
|
|
return frCount / words.length > 0.04
|
|
}
|
|
|
|
const sleep = ms => new Promise(resolve => setTimeout(resolve, ms))
|
|
|
|
class Translator {
|
|
constructor() {
|
|
this.deeplApi = process.env.DEEPL_URL || 'api-free.deepl.com'
|
|
this.deeplKey = process.env.DEEPL_KEY
|
|
this.urlRequest = `https://${this.deeplApi}/v2/translate`
|
|
}
|
|
|
|
async get(origin, target, text) {
|
|
try {
|
|
const data = {
|
|
auth_key: this.deeplKey,
|
|
source_lang: origin,
|
|
target_lang: target,
|
|
text
|
|
}
|
|
const result = await axios.post(this.urlRequest, {
|
|
text: Array.isArray(text) ? text : [text],
|
|
source_lang: origin,
|
|
target_lang: target,
|
|
}, {
|
|
headers: {
|
|
Authorization: `DeepL-Auth-Key ${this.deeplKey}`,
|
|
'Content-Type': 'application/json'
|
|
}
|
|
})
|
|
|
|
return result.data
|
|
} catch (error) {
|
|
console.error('DeepL error:', error?.response?.data || error);
|
|
}
|
|
}
|
|
}
|
|
|
|
module.exports = createCoreService('api::parole.parole', ({strapi}) => ({
|
|
async translate(origin, target, text) {
|
|
const translator = new Translator()
|
|
const data = await translator.get(origin, target, text)
|
|
return data.translations[0].text
|
|
},
|
|
async translateLyrics(parolesFR) {
|
|
const anglais = await this.translate('FR', 'EN', parolesFR)
|
|
const espagnol = await this.translate('FR', 'ES', parolesFR)
|
|
const allemand = await this.translate('FR', 'DE', parolesFR)
|
|
const italien = await this.translate('FR', 'IT', parolesFR)
|
|
|
|
return {
|
|
francais: parolesFR,
|
|
anglais: anglais + '\n\n (Translated by DeepL)',
|
|
espagnol: espagnol + '\n\n (Traducido por DeepL)',
|
|
allemand: allemand + '\n\n (Übersetzt von DeepL)',
|
|
italien: italien + '\n\n (Tradotto da DeepL)'
|
|
}
|
|
},
|
|
validateParoles(titre, transcription) {
|
|
if (!titre || titre.trim().length === 0) {
|
|
throw new ApplicationError('Champ obligatoire. Veuillez choisir un titre.');
|
|
}
|
|
|
|
if (!transcription || transcription.trim().length === 0) {
|
|
throw new ApplicationError('Champ obligatoire. Veuillez renseigner la transcription.')
|
|
}
|
|
|
|
if (transcription.trim().length < 10) {
|
|
throw new ApplicationError('La transcription doit contenir au moins 10 caractères.')
|
|
}
|
|
},
|
|
async fetchAllParoles() {
|
|
const pageSize = 100
|
|
let start = 0
|
|
const all = []
|
|
|
|
while (true) {
|
|
const batch = await strapi.documents('api::parole.parole').findMany({
|
|
status: 'published',
|
|
populate: ['artistes', 'traductions'],
|
|
fields: ['documentId', 'titre', 'slug', 'transcription', 'annee', 'langueSource'],
|
|
limit: pageSize,
|
|
start,
|
|
})
|
|
all.push(...batch)
|
|
if (batch.length < pageSize) break
|
|
start += pageSize
|
|
}
|
|
|
|
return all
|
|
},
|
|
|
|
buildExport(paroles, type, langs) {
|
|
const targetLangs = langs && langs.length ? langs : ALL_LANGS
|
|
const pairs = []
|
|
const missing = []
|
|
const nonKa = []
|
|
const langCounts = {}
|
|
|
|
for (const parole of paroles) {
|
|
const source = stripMarkdown(parole.transcription)
|
|
const sourceLang = parole.langueSource || 'ka'
|
|
const artists = (parole.artistes || []).map(a => a.alias)
|
|
const paroleMeta = { title: parole.titre, artists }
|
|
|
|
if (sourceLang !== 'ka') {
|
|
nonKa.push({ documentId: parole.documentId, slug: parole.slug, ...paroleMeta, suspected_lang: sourceLang })
|
|
} else if (suspectFrench(source)) {
|
|
nonKa.push({ documentId: parole.documentId, slug: parole.slug, ...paroleMeta, suspected_lang: 'fr' })
|
|
}
|
|
|
|
const missingLangs = ALL_LANGS.filter(lang => !parole.traductions?.[LANG_MAP[lang].field])
|
|
if (missingLangs.length > 0) {
|
|
missing.push({ documentId: parole.documentId, slug: parole.slug, ...paroleMeta, missing: missingLangs })
|
|
}
|
|
|
|
for (const lang of targetLangs) {
|
|
const { field, targetLang, userPrompt } = LANG_MAP[lang]
|
|
if (lang === sourceLang) continue
|
|
const target = stripMarkdown(parole.traductions?.[field])
|
|
if (!target) continue
|
|
|
|
langCounts[lang] = (langCounts[lang] || 0) + 1
|
|
|
|
if (type === 'instruct') {
|
|
const systemPrompt = sourceLang === 'ka'
|
|
? 'Tu es un expert en langue KA (créole guadeloupéen/martiniquais). Traduis le texte KA suivant.'
|
|
: `Tu es un expert en traduction. Traduis le texte suivant (langue source : ${sourceLang}).`
|
|
pairs.push({
|
|
messages: [
|
|
{ role: 'system', content: systemPrompt },
|
|
{ role: 'user', content: `${userPrompt} :\n\n${source}` },
|
|
{ role: 'assistant', content: target },
|
|
],
|
|
})
|
|
} else {
|
|
pairs.push({
|
|
source_lang: sourceLang,
|
|
target_lang: targetLang,
|
|
source,
|
|
target,
|
|
...paroleMeta,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
const metadata = {
|
|
exported_at: new Date().toISOString(),
|
|
total_paroles: paroles.length,
|
|
total_pairs: pairs.length,
|
|
languages: langCounts,
|
|
missing_translations: missing,
|
|
non_ka_transcriptions: nonKa,
|
|
}
|
|
|
|
return { metadata, pairs }
|
|
},
|
|
|
|
async bulkTranslateMissing() {
|
|
const TARGET_LANGS = [
|
|
{ lang: 'en', field: 'anglais', deeplTarget: 'EN', suffix: '\n\n(Translated by DeepL)' },
|
|
{ lang: 'es', field: 'espagnol', deeplTarget: 'ES', suffix: '\n\n(Traducido por DeepL)' },
|
|
{ lang: 'de', field: 'allemand', deeplTarget: 'DE', suffix: '\n\n(Übersetzt von DeepL)' },
|
|
{ lang: 'it', field: 'italien', deeplTarget: 'IT', suffix: '\n\n(Tradotto da DeepL)' },
|
|
]
|
|
|
|
const pageSize = 100
|
|
let start = 0
|
|
const all = []
|
|
while (true) {
|
|
const batch = await strapi.documents('api::parole.parole').findMany({
|
|
status: 'published',
|
|
populate: ['traductions'],
|
|
fields: ['documentId', 'slug', 'titre', 'transcription', 'langueSource'],
|
|
limit: pageSize,
|
|
start,
|
|
})
|
|
all.push(...batch)
|
|
if (batch.length < pageSize) break
|
|
start += pageSize
|
|
}
|
|
|
|
const translator = new Translator()
|
|
const translated = []
|
|
const skipped = []
|
|
const errors = []
|
|
|
|
for (const parole of all) {
|
|
const sourceFR = parole.traductions?.francais
|
|
|| (parole.langueSource === 'fr' ? parole.transcription : null)
|
|
|
|
if (!sourceFR) { skipped.push(parole.slug); continue }
|
|
|
|
const missing = TARGET_LANGS.filter(({ field }) => !parole.traductions?.[field])
|
|
if (missing.length === 0) { skipped.push(parole.slug); continue }
|
|
|
|
const { id: _id, ...tradData } = parole.traductions || {}
|
|
const updatedTrad = { ...tradData }
|
|
const addedLangs = []
|
|
|
|
for (const { lang, field, deeplTarget, suffix } of missing) {
|
|
try {
|
|
await sleep(700)
|
|
const result = await translator.get('FR', deeplTarget, sourceFR)
|
|
const text = result?.translations?.[0]?.text
|
|
if (text) {
|
|
updatedTrad[field] = text + suffix
|
|
addedLangs.push(lang)
|
|
}
|
|
} catch (err) {
|
|
errors.push({ slug: parole.slug, lang: deeplTarget, error: err.message })
|
|
}
|
|
}
|
|
|
|
if (addedLangs.length > 0) {
|
|
await strapi.documents('api::parole.parole').update({
|
|
documentId: parole.documentId,
|
|
data: { traductions: updatedTrad },
|
|
})
|
|
await strapi.documents('api::parole.parole').publish({
|
|
documentId: parole.documentId,
|
|
})
|
|
translated.push({ slug: parole.slug, langs: addedLangs })
|
|
}
|
|
}
|
|
|
|
return { translated, skipped, errors }
|
|
},
|
|
|
|
parolesDiff(titre = '', oldString, newString) {
|
|
const patch = Diff.createPatch(titre, oldString, newString, 'supprimée', 'ajoutée')
|
|
const parsePatch = Diff.parsePatch(patch)
|
|
|
|
if (parsePatch[0].hunks.length > 0) {
|
|
const jsonDiff = Diff.diffWords(oldString, newString)
|
|
|
|
return {
|
|
patch,
|
|
jsonDiff
|
|
}
|
|
}
|
|
}
|
|
}));
|