@@ -105,7 +105,9 @@ export class CMSLocalization {
105105 }
106106
107107 /**
108- * Convert Strapi JSON to FTL format
108+ * Convert Strapi JSON to FTL format using hash-only IDs
109+ * Each string field gets a unique hash ID based on its content.
110+ * Identical English strings will share the same translation across all contexts.
109111 */
110112 strapiToFtl ( strapiData : any [ ] ) : string {
111113 const ftlLines : string [ ] = [ ] ;
@@ -138,7 +140,7 @@ export class CMSLocalization {
138140 // Convert each string to FTL format
139141 for ( const [ fieldPath , value ] of Object . entries ( strings ) ) {
140142 const sanitizedValue = this . sanitizeContent ( value ) ;
141- const ftlId = this . generateFtlIdWithL10nId ( l10nId , fieldPath , sanitizedValue ) ;
143+ const ftlId = this . generateFtlId ( sanitizedValue ) ;
142144
143145 allEntries . push ( {
144146 l10nId,
@@ -188,35 +190,92 @@ export class CMSLocalization {
188190 ftlLines . push ( `${ entry . ftlId } = ${ entry . value } ` ) ;
189191 }
190192
191- return ftlLines . join ( '\n' ) ;
193+ const ftlContent = ftlLines . join ( '\n' ) ;
194+ return this . sanitizeFtlContent ( ftlContent ) ;
195+ }
196+
197+ /**
198+ * Sanitize entire FTL content to remove hidden Unicode characters
199+ */
200+ private sanitizeFtlContent ( ftlContent : string ) : string {
201+ return this . sanitizeContent ( ftlContent , {
202+ normalizeUnicode : false ,
203+ removeControlChars : false ,
204+ trimWhitespace : false ,
205+ normalizeLineEndings : true ,
206+ ensureFileTermination : true
207+ } ) ;
192208 }
193209
194210 /**
195211 * Sanitize content for FTL format
212+ * @param content - The content to sanitize
213+ * @param options - Sanitization options
196214 */
197- private sanitizeContent ( content : string ) : string {
215+ private sanitizeContent ( content : string , options : {
216+ normalizeUnicode ?: boolean ;
217+ removeControlChars ?: boolean ;
218+ trimWhitespace ?: boolean ;
219+ normalizeLineEndings ?: boolean ;
220+ ensureFileTermination ?: boolean ;
221+ } = { } ) : string {
198222 if ( ! content || typeof content !== 'string' ) {
199223 return '' ;
200224 }
201225
202- return content
203- // Normalize Unicode characters and remove problematic ones
204- . normalize ( 'NFD' ) // Decompose Unicode characters
226+ const {
227+ normalizeUnicode = true ,
228+ removeControlChars = true ,
229+ trimWhitespace = true ,
230+ normalizeLineEndings = false ,
231+ ensureFileTermination = false
232+ } = options ;
233+
234+ let sanitized = content
235+ . replace ( / ^ \uFEFF / , '' )
236+ . replace ( / [ \u202A - \u202E \u2066 - \u2069 ] / g, '' )
237+ . replace ( / [ \u200B - \u200D \uFEFF ] / g, '' )
238+ . replace ( / [ \u2028 \u2029 ] / g, normalizeLineEndings ? '\n' : '' )
239+ . replace ( / [ \u2060 ] / g, '' ) ;
240+
241+ // Apply optional sanitizations
242+ if ( normalizeUnicode ) {
243+ sanitized = sanitized . normalize ( 'NFC' ) ;
244+ }
245+
246+ if ( removeControlChars ) {
205247 // eslint-disable-next-line no-control-regex
206- . replace ( / [ \u0000 - \u0008 \u000B \u000C \u000E - \u001F \u007F - \u009F ] / g, '' ) // Remove control characters (preserve tab, newline, carriage return)
207- . replace ( / [ \u200B - \u200D \uFEFF ] / g, '' ) // Remove zero-width spaces and BOM
208- . trim ( ) ; // Remove leading/trailing whitespace
248+ sanitized = sanitized . replace ( / [ \u0000 - \u0008 \u000B \u000C \u000E - \u001F \u007F - \u009F ] / g, '' ) ;
249+ }
250+
251+ if ( normalizeLineEndings ) {
252+ sanitized = sanitized
253+ . replace ( / \r \n / g, '\n' )
254+ . replace ( / \r / g, '\n' ) ;
255+ }
256+
257+ if ( trimWhitespace ) {
258+ sanitized = sanitized . trim ( ) ;
259+ }
260+
261+ if ( ensureFileTermination ) {
262+ sanitized = sanitized . replace ( / \n * $ / , '\n' ) ;
263+ }
264+
265+ return sanitized ;
209266 }
210267
211268 /**
212- * Generate FTL ID with l10nId prefix, component name, field name, and hash
269+ * Generate FTL ID using only the hash of the string content
270+ * Identical English strings will share the same translation across all contexts
271+ *
272+ * Note: Uses 12-character MD5 substring. Hash collisions are extremely unlikely
273+ * but if they become an issue, the hash length can be increased.
213274 */
214- private generateFtlIdWithL10nId ( l10nId : string , fieldPath : string , value : string ) : string {
215- // Create a hash of the value for versioning
275+ private generateFtlId ( value : string ) : string {
276+ // Create a hash of the value and prefix with "fxa-"
216277 const hash = crypto . createHash ( 'md5' ) . update ( value ) . digest ( 'hex' ) . substring ( 0 , 8 ) ;
217- // Replace dots with dashes in the field path to create proper FTL ID format
218- const fieldPathWithDashes = fieldPath . replace ( / \. / g, '-' ) ;
219- return `${ l10nId } -${ fieldPathWithDashes } -${ hash } ` ;
278+ return `fxa-${ hash } ` ;
220279 }
221280
222281 /**
@@ -242,106 +301,6 @@ export class CMSLocalization {
242301 return `${ readableFieldName } for ${ readableComponentName } ` ;
243302 }
244303
245- /**
246- * Parse FTL ID back to field path
247- */
248- parseFtlIdToFieldPath ( ftlId : string ) : string | null {
249- // Handle format: l10nId-componentName-fieldName-hash
250- const parts = ftlId . split ( '-' ) ;
251-
252- if ( parts . length >= 4 ) {
253- // Remove the l10nId (first part) and hash (last part)
254- const fieldParts = parts . slice ( 1 , - 1 ) ;
255- return fieldParts . join ( '.' ) ;
256- }
257-
258- return null ;
259- }
260-
261- /**
262- * Convert FTL content to Strapi format
263- */
264- convertFtlToStrapiFormat (
265- l10nId : string ,
266- ftlContent : string ,
267- baseData : any
268- ) : any {
269- const strapiFormat : any = {
270- clientId : baseData . clientId ,
271- entrypoint : baseData . entrypoint ,
272- name : baseData . name ,
273- l10nId : baseData . l10nId ,
274- } ;
275-
276- const lines = ftlContent . split ( '\n' ) ;
277-
278- for ( const line of lines ) {
279- const trimmedLine = line . trim ( ) ;
280-
281- if ( ! trimmedLine || trimmedLine . startsWith ( '#' ) ) {
282- continue ;
283- }
284-
285- const match = trimmedLine . match ( / ^ ( [ a - z A - Z 0 - 9 _ - ] + ) \s * = \s * ( .* ) $ / ) ;
286- if ( match ) {
287- const [ , ftlId , value ] = match ;
288-
289- // Extract l10nId from the FTL ID (format: l10nId-componentName-fieldName-hash)
290- const ftlIdParts = ftlId . split ( '-' ) ;
291- if ( ftlIdParts . length >= 3 ) {
292- const ftlL10nId = ftlIdParts [ 0 ] ;
293- if ( ftlL10nId === l10nId ) {
294- const fieldPath = this . parseFtlIdToFieldPath ( ftlId ) ;
295-
296- if ( fieldPath ) {
297- const parts = fieldPath . split ( '.' ) ;
298- if ( parts . length >= 2 ) {
299- const componentName = parts [ 0 ] ;
300- const fieldName = parts [ 1 ] ;
301-
302- // Initialize component if it doesn't exist
303- if ( ! strapiFormat [ componentName ] ) {
304- strapiFormat [ componentName ] = { } ;
305- }
306-
307- // Set the localized value (unescape quotes and newlines)
308- const unescapedValue = value
309- . replace ( / \\ " / g, '"' )
310- . replace ( / \\ n / g, '\n' )
311- . replace ( / \\ r / g, '\r' ) ;
312-
313- strapiFormat [ componentName ] [ fieldName ] = unescapedValue ;
314-
315- this . log . debug ( 'cms.localization.convert.ftlToStrapi' , {
316- ftlId,
317- ftlL10nId,
318- targetL10nId : l10nId ,
319- fieldPath,
320- componentName,
321- fieldName,
322- value : unescapedValue ,
323- } ) ;
324- }
325- }
326- } else {
327- this . log . debug ( 'cms.localization.convert.ftlToStrapi.skipped' , {
328- ftlId,
329- ftlL10nId,
330- targetL10nId : l10nId ,
331- } ) ;
332- }
333- }
334- }
335- }
336-
337- this . log . info ( 'cms.localization.convert.ftlToStrapi.complete' , {
338- l10nId,
339- totalLines : lines . length ,
340- processedComponents : Object . keys ( strapiFormat ) . filter ( key => key !== 'clientId' && key !== 'entrypoint' && key !== 'name' && key !== 'l10nId' ) . length ,
341- } ) ;
342-
343- return strapiFormat ;
344- }
345304
346305 /**
347306 * Validate GitHub configuration
@@ -727,26 +686,120 @@ export class CMSLocalization {
727686 }
728687
729688 /**
730- * Merge base config with localized FTL content
689+ * Build a translation map from FTL content
690+ * Parses FTL content and creates a hash -> translation mapping
691+ */
692+ private buildTranslationMap ( ftlContent : string ) : Record < string , string > {
693+ const translationMap : Record < string , string > = { } ;
694+ const lines = ftlContent . split ( '\n' ) ;
695+
696+ for ( const line of lines ) {
697+ const trimmedLine = line . trim ( ) ;
698+
699+ // Skip empty lines and comments
700+ if ( ! trimmedLine || trimmedLine . startsWith ( '#' ) ) {
701+ continue ;
702+ }
703+
704+ // Match FTL format: hash = translation (including hyphens in hash)
705+ const match = trimmedLine . match ( / ^ ( [ a - z A - Z 0 - 9 - ] + ) \s * = \s * ( .* ) $ / ) ;
706+ if ( match ) {
707+ const [ , hash , translation ] = match ;
708+
709+ // Unescape quotes and newlines in the translation
710+ const unescapedTranslation = translation
711+ . replace ( / \\ " / g, '"' )
712+ . replace ( / \\ n / g, '\n' )
713+ . replace ( / \\ r / g, '\r' ) ;
714+
715+ translationMap [ hash ] = unescapedTranslation ;
716+ }
717+ }
718+
719+ this . log . debug ( 'cms.localization.buildTranslationMap' , {
720+ totalTranslations : Object . keys ( translationMap ) . length ,
721+ hashes : Object . keys ( translationMap )
722+ } ) ;
723+
724+ return translationMap ;
725+ }
726+
727+ /**
728+ * Apply translations to a config object recursively
729+ * Walks through the config and replaces English strings with translations where available
730+ */
731+ private applyTranslations ( config : Record < string , unknown > , translationMap : Record < string , string > ) : void {
732+ for ( const [ key , value ] of Object . entries ( config ) ) {
733+ if ( value === null || value === undefined ) {
734+ continue ;
735+ }
736+
737+ if ( typeof value === 'object' && ! Array . isArray ( value ) ) {
738+ // Recursively apply translations to nested objects
739+ this . applyTranslations ( value as Record < string , unknown > , translationMap ) ;
740+ } else if ( this . shouldIncludeField ( key , value ) ) {
741+ // This is a localizable string field
742+ const englishValue = value as string ;
743+ const hash = this . generateFtlId ( englishValue ) ;
744+
745+ if ( translationMap [ hash ] ) {
746+ // Replace with translation if available
747+ config [ key ] = translationMap [ hash ] ;
748+
749+ this . log . debug ( 'cms.localization.applyTranslations.replaced' , {
750+ key,
751+ hash,
752+ englishValue,
753+ translatedValue : translationMap [ hash ]
754+ } ) ;
755+ } else {
756+ // Keep English value if no translation exists
757+ this . log . debug ( 'cms.localization.applyTranslations.keptEnglish' , {
758+ key,
759+ hash,
760+ englishValue
761+ } ) ;
762+ }
763+ }
764+ }
765+ }
766+
767+ /**
768+ * Merge base config with localized FTL content using translation lookup
769+ *
770+ * This method implements a new hash-based translation system:
771+ * 1. Parses FTL content to build a hash -> translation mapping
772+ * 2. Walks through the base config recursively
773+ * 3. For each localizable string field, computes its content hash
774+ * 4. If a translation exists for that hash, replaces the English text
775+ * 5. Otherwise, keeps the original English text as fallback
776+ *
731777 */
732778 public async mergeConfigs ( baseConfig : Record < string , unknown > , ftlContent : string , clientId : string , entrypoint : string ) : Promise < Record < string , unknown > > {
733779 if ( ! ftlContent || ! baseConfig ) {
734780 return baseConfig ;
735781 }
736782
737783 try {
738- // Generate l10nId for this client/entrypoint combination
739- const l10nId = baseConfig . l10nId as string ;
740-
741- // Convert FTL to Strapi format using existing utility
742- const localizedData = this . convertFtlToStrapiFormat (
743- l10nId ,
744- ftlContent ,
745- baseConfig
746- ) ;
784+ // 1. Parse FTL content into hash -> translation map
785+ const translationMap = this . buildTranslationMap ( ftlContent ) ;
747786
748- // Deep merge with base config (localized data takes precedence)
749- return this . deepMerge ( baseConfig , localizedData ) ;
787+ // 2. Clone base config to avoid mutations
788+ const result = JSON . parse ( JSON . stringify ( baseConfig ) ) ;
789+
790+ // 3. Walk config and apply translations where they exist
791+ this . applyTranslations ( result , translationMap ) ;
792+
793+ this . log . info ( 'cms.getLocalizedConfig.merge.success' , {
794+ clientId,
795+ entrypoint,
796+ totalTranslations : Object . keys ( translationMap ) . length ,
797+ appliedTranslations : Object . keys ( translationMap ) . filter ( hash =>
798+ JSON . stringify ( result ) . includes ( translationMap [ hash ] )
799+ ) . length
800+ } ) ;
801+
802+ return result ;
750803 } catch ( error ) {
751804 this . log . error ( 'cms.getLocalizedConfig.merge.error' , {
752805 error : error . message ,
@@ -758,22 +811,6 @@ export class CMSLocalization {
758811 }
759812 }
760813
761- /**
762- * Deep merge utility for combining base and localized configs
763- */
764- private deepMerge ( base : Record < string , unknown > , localized : Record < string , unknown > ) : Record < string , unknown > {
765- const result = { ...base } ;
766-
767- for ( const [ key , value ] of Object . entries ( localized ) ) {
768- if ( value && typeof value === 'object' && ! Array . isArray ( value ) ) {
769- result [ key ] = this . deepMerge ( ( result [ key ] as Record < string , unknown > ) || { } , value as Record < string , unknown > ) ;
770- } else {
771- result [ key ] = value ;
772- }
773- }
774-
775- return result ;
776- }
777814
778815 /**
779816 * Generate FTL content from Strapi entries
0 commit comments