diff --git a/goss-correctencoding.rb b/goss-correctencoding.rb index 7133461..07b071a 100644 --- a/goss-correctencoding.rb +++ b/goss-correctencoding.rb @@ -31,27 +31,42 @@ class GossamerForumsCorrectEncoding < ImportScripts::Base # Method to detect and fix text encoding def fix_text_encoding(text) - begin - # Detect encoding - detection = CharlockHolmes::EncodingDetector.detect(text) - original_encoding = detection[:encoding] - puts "Original encoding detected: #{original_encoding}" + # Detect encoding + detection = CharlockHolmes::EncodingDetector.detect(text) + original_encoding = detection[:encoding] + puts "Original encoding detected: #{original_encoding}" - # Force the encoding to the detected one, then covnert to UTF-8 - if original_encoding == 'ISO-8859-1' || original_encoding == 'windows-1252' - # For Windows-1252 or ISO-8859-1, force the encoding and convert to UTF-8 - # text.force_encoding('ISO-8859-1').encode('UTF-8') - text.force_encoding(original_encoding).encode('UTF-8', invalid: :replace, undef: :replace, replace: '?') - else - # Try to convert from detected encoding to UTF-8 - text.encode('UTF-8', original_encoding, invalid: :replace, undef: :replace, replace: '?') +# # Force the encoding to the detected one, then covnert to UTF-8 +# if original_encoding == 'ISO-8859-1' || original_encoding == 'windows-1252' +# # For Windows-1252 or ISO-8859-1, force the encoding and convert to UTF-8 +# # text.force_encoding('ISO-8859-1').encode('UTF-8') +# text.force_encoding(original_encoding).encode('UTF-8', invalid: :replace, undef: :replace, replace: '?') +# else +# # Try to convert from detected encoding to UTF-8 +# text.encode('UTF-8', original_encoding, invalid: :replace, undef: :replace, replace: '?') +# end + + if original_encoding + begin + decoded_content = CharlockHolmes::Converter.convert(raw_content, original_encoding, 'UTF-8') + rescue => e + puts "Error during encoding conversion: #{e.message}" + decoded_content = raw_content # Fall back to raw content if decoding fails end - rescue StandardError => e - puts "Error during encoding conversion: #{e.message}" - puts e.backtrace.join("\n") # Print the full stack trace - text + else + decoded_content = raw_content # Fallback if encoding detection fails end + + # Step 3: Ensure the content is now correctly in UTF-8 (no need to encode again) + return decoded_content end + +# rescue StandardError => e +# puts "Error during encoding conversion: #{e.message}" +# puts e.backtrace.join("\n") # Print the full stack trace +# text +# end +# end # Method to fix encoding issues in post content def fix_encoding