Files
api.pawol.nu/src/api/parole/services/parole.js
T

286 lines
9.4 KiB
JavaScript
Raw Normal View History

2022-05-12 03:33:24 +04:00
'use strict';
2022-05-20 00:05:34 +04:00
const qs = require('qs')
const axios = require('axios')
2022-12-09 04:42:27 +04:00
const Diff = require('diff')
2022-05-12 03:33:24 +04:00
const { createCoreService } = require('@strapi/strapi').factories;
const { ApplicationError } = require("@strapi/utils").errors
2022-05-12 03:33:24 +04:00
const LANG_MAP = {
fr: { field: 'francais', targetLang: 'fr', userPrompt: 'Tradui an fransé' },
en: { field: 'anglais', targetLang: 'en', userPrompt: 'Translate to English' },
es: { field: 'espagnol', targetLang: 'es', userPrompt: 'Traduce al español' },
de: { field: 'allemand', targetLang: 'de', userPrompt: 'Übersetze auf Deutsch' },
it: { field: 'italien', targetLang: 'it', userPrompt: 'Traduci in italiano' },
}
const ALL_LANGS = Object.keys(LANG_MAP)
function stripMarkdown(text) {
if (!text) return ''
return text
.replace(/#{1,6}\s+/g, '')
.replace(/\*\*(.*?)\*\*/gs, '$1')
.replace(/\*(.*?)\*/gs, '$1')
.replace(/__(.*?)__/gs, '$1')
.replace(/_(.*?)_/gs, '$1')
.replace(/\[([^\]]+)\]\([^\)]+\)/g, '$1')
.replace(/^[>\-\*\+]\s+/gm, '')
.replace(/\n{3,}/g, '\n\n')
.trim()
}
// Détecte si une transcription est probablement en français plutôt qu'en KA.
// Heuristique : si les pronoms personnels français représentent > 4 % des mots.
const FR_PRONOUNS = new Set(['je', 'tu', 'il', 'elle', 'nous', 'vous', 'ils', 'elles'])
function suspectFrench(text) {
if (!text) return false
const words = text.toLowerCase().match(/\b[a-zàâäéèêëîïôöùûüç]+\b/g) || []
if (words.length < 10) return false
const frCount = words.filter(w => FR_PRONOUNS.has(w)).length
return frCount / words.length > 0.04
}
2026-06-15 20:19:30 +04:00
const sleep = ms => new Promise(resolve => setTimeout(resolve, ms))
2022-05-20 00:05:34 +04:00
class Translator {
constructor() {
this.deeplApi = process.env.DEEPL_URL || 'api-free.deepl.com'
this.deeplKey = process.env.DEEPL_KEY
this.urlRequest = `https://${this.deeplApi}/v2/translate`
}
async get(origin, target, text) {
try {
const data = {
auth_key: this.deeplKey,
source_lang: origin,
target_lang: target,
text
}
2026-04-28 12:30:46 +04:00
const result = await axios.post(this.urlRequest, {
text: Array.isArray(text) ? text : [text],
source_lang: origin,
target_lang: target,
}, {
2022-05-20 00:05:34 +04:00
headers: {
2026-04-28 12:30:46 +04:00
Authorization: `DeepL-Auth-Key ${this.deeplKey}`,
'Content-Type': 'application/json'
2022-05-20 00:05:34 +04:00
}
})
return result.data
} catch (error) {
2026-04-28 12:30:46 +04:00
console.error('DeepL error:', error?.response?.data || error);
2022-05-20 00:05:34 +04:00
}
}
}
module.exports = createCoreService('api::parole.parole', ({strapi}) => ({
async translate(origin, target, text) {
const translator = new Translator()
const data = await translator.get(origin, target, text)
return data.translations[0].text
},
async translateLyrics(parolesFR) {
const anglais = await this.translate('FR', 'EN', parolesFR)
const espagnol = await this.translate('FR', 'ES', parolesFR)
const allemand = await this.translate('FR', 'DE', parolesFR)
const italien = await this.translate('FR', 'IT', parolesFR)
2022-12-09 04:42:27 +04:00
return {
francais: parolesFR,
anglais: anglais + '\n\n (Translated by DeepL)',
espagnol: espagnol + '\n\n (Traducido por DeepL)',
allemand: allemand + '\n\n (Übersetzt von DeepL)',
italien: italien + '\n\n (Tradotto da DeepL)'
}
},
validateParoles(titre, transcription) {
if (!titre || titre.trim().length === 0) {
throw new ApplicationError('Champ obligatoire. Veuillez choisir un titre.');
}
2022-12-09 04:42:27 +04:00
if (!transcription || transcription.trim().length === 0) {
throw new ApplicationError('Champ obligatoire. Veuillez renseigner la transcription.')
}
2022-12-09 04:42:27 +04:00
if (transcription.trim().length < 10) {
throw new ApplicationError('La transcription doit contenir au moins 10 caractères.')
}
2022-12-09 04:42:27 +04:00
},
async fetchAllParoles() {
const pageSize = 100
let start = 0
const all = []
while (true) {
const batch = await strapi.documents('api::parole.parole').findMany({
status: 'published',
populate: ['artistes', 'traductions'],
2026-06-12 14:10:05 +04:00
fields: ['documentId', 'titre', 'slug', 'transcription', 'annee', 'langueSource'],
limit: pageSize,
start,
})
all.push(...batch)
if (batch.length < pageSize) break
start += pageSize
}
return all
},
buildExport(paroles, type, langs) {
const targetLangs = langs && langs.length ? langs : ALL_LANGS
const pairs = []
const missing = []
const nonKa = []
const langCounts = {}
for (const parole of paroles) {
const source = stripMarkdown(parole.transcription)
2026-06-12 14:10:05 +04:00
const sourceLang = parole.langueSource || 'ka'
const artists = (parole.artistes || []).map(a => a.alias)
const paroleMeta = { title: parole.titre, artists }
2026-06-12 14:10:05 +04:00
if (sourceLang !== 'ka') {
nonKa.push({ documentId: parole.documentId, slug: parole.slug, ...paroleMeta, suspected_lang: sourceLang })
} else if (suspectFrench(source)) {
nonKa.push({ documentId: parole.documentId, slug: parole.slug, ...paroleMeta, suspected_lang: 'fr' })
}
const missingLangs = ALL_LANGS.filter(lang => !parole.traductions?.[LANG_MAP[lang].field])
if (missingLangs.length > 0) {
missing.push({ documentId: parole.documentId, slug: parole.slug, ...paroleMeta, missing: missingLangs })
}
for (const lang of targetLangs) {
const { field, targetLang, userPrompt } = LANG_MAP[lang]
2026-06-12 14:10:05 +04:00
if (lang === sourceLang) continue
const target = stripMarkdown(parole.traductions?.[field])
if (!target) continue
langCounts[lang] = (langCounts[lang] || 0) + 1
if (type === 'instruct') {
2026-06-12 14:10:05 +04:00
const systemPrompt = sourceLang === 'ka'
? 'Tu es un expert en langue KA (créole guadeloupéen/martiniquais). Traduis le texte KA suivant.'
: `Tu es un expert en traduction. Traduis le texte suivant (langue source : ${sourceLang}).`
pairs.push({
messages: [
2026-06-12 14:10:05 +04:00
{ role: 'system', content: systemPrompt },
{ role: 'user', content: `${userPrompt} :\n\n${source}` },
{ role: 'assistant', content: target },
],
})
} else {
pairs.push({
2026-06-12 14:10:05 +04:00
source_lang: sourceLang,
target_lang: targetLang,
source,
target,
...paroleMeta,
})
}
}
}
const metadata = {
exported_at: new Date().toISOString(),
total_paroles: paroles.length,
total_pairs: pairs.length,
languages: langCounts,
missing_translations: missing,
non_ka_transcriptions: nonKa,
}
return { metadata, pairs }
},
2026-06-15 20:19:30 +04:00
async bulkTranslateMissing() {
const TARGET_LANGS = [
{ lang: 'en', field: 'anglais', deeplTarget: 'EN', suffix: '\n\n(Translated by DeepL)' },
{ lang: 'es', field: 'espagnol', deeplTarget: 'ES', suffix: '\n\n(Traducido por DeepL)' },
{ lang: 'de', field: 'allemand', deeplTarget: 'DE', suffix: '\n\n(Übersetzt von DeepL)' },
{ lang: 'it', field: 'italien', deeplTarget: 'IT', suffix: '\n\n(Tradotto da DeepL)' },
]
const pageSize = 100
let start = 0
const all = []
while (true) {
const batch = await strapi.documents('api::parole.parole').findMany({
status: 'published',
populate: ['traductions'],
fields: ['documentId', 'slug', 'titre', 'transcription', 'langueSource'],
limit: pageSize,
start,
})
all.push(...batch)
if (batch.length < pageSize) break
start += pageSize
}
const translator = new Translator()
const translated = []
const skipped = []
const errors = []
for (const parole of all) {
const sourceFR = parole.traductions?.francais
|| (parole.langueSource === 'fr' ? parole.transcription : null)
if (!sourceFR) { skipped.push(parole.slug); continue }
const missing = TARGET_LANGS.filter(({ field }) => !parole.traductions?.[field])
if (missing.length === 0) { skipped.push(parole.slug); continue }
const { id: _id, ...tradData } = parole.traductions || {}
const updatedTrad = { ...tradData }
const addedLangs = []
for (const { lang, field, deeplTarget, suffix } of missing) {
try {
await sleep(700)
const result = await translator.get('FR', deeplTarget, sourceFR)
const text = result?.translations?.[0]?.text
if (text) {
updatedTrad[field] = text + suffix
addedLangs.push(lang)
}
} catch (err) {
errors.push({ slug: parole.slug, lang: deeplTarget, error: err.message })
}
}
if (addedLangs.length > 0) {
await strapi.documents('api::parole.parole').update({
documentId: parole.documentId,
data: { traductions: updatedTrad },
})
await strapi.documents('api::parole.parole').publish({
documentId: parole.documentId,
})
translated.push({ slug: parole.slug, langs: addedLangs })
}
}
return { translated, skipped, errors }
},
2022-12-09 04:42:27 +04:00
parolesDiff(titre = '', oldString, newString) {
const patch = Diff.createPatch(titre, oldString, newString, 'supprimée', 'ajoutée')
const parsePatch = Diff.parsePatch(patch)
if (parsePatch[0].hunks.length > 0) {
2022-12-12 22:19:59 +04:00
const jsonDiff = Diff.diffWords(oldString, newString)
return {
patch,
jsonDiff
}
2022-12-09 04:42:27 +04:00
}
2022-05-20 00:05:34 +04:00
}
}));