v0.7 Further attempt to get this reverse dobule encoding right now

This commit is contained in:
David Sainty 2024-09-09 22:39:28 +10:00
parent 736d0afe06
commit de0ce06cd7

View File

@ -36,15 +36,15 @@ class GossamerForumsCorrectEncoding < ImportScripts::Base
# # Treat as Windows-1252 (cp1252) and then decode into UTF-8 # # Treat as Windows-1252 (cp1252) and then decode into UTF-8
# corrected_content = content.encode('CP1252').force_encoding('UTF-8') # corrected_content = content.encode('CP1252').force_encoding('UTF-8')
# Step 1: Treat content as CP1252 and convert it back to UTF-8 # Step 1: Treat content as CP1252 and convert it back to UTF-8
corrected_content = content.encode('CP1252').force_encoding('UTF-8') corrected_content = content.encode('CP1252').force_encoding('UTF-8').force_encoding('UTF-8')
# Step 2: Check if there's still a problem (if still corrupted, apply the second pass) # # Step 2: Check if there's still a problem (if still corrupted, apply the second pass)
if corrected_content.valid_encoding? # if corrected_content.valid_encoding?
return corrected_content # return corrected_content
else # else
# Step 3: If it's not valid UTF-8, re-encode and try to fix remaining issues # # Step 3: If it's not valid UTF-8, re-encode and try to fix remaining issues
corrected_content.encode('UTF-8', invalid: :replace, undef: :replace) # corrected_content.encode('UTF-8', invalid: :replace, undef: :replace)
end # end
rescue Encoding::UndefinedConversionError => e rescue Encoding::UndefinedConversionError => e
puts "Error during encoding conversion: #{e.message}" puts "Error during encoding conversion: #{e.message}"
puts e.backtrace.join("\n") # Print the full stack trace puts e.backtrace.join("\n") # Print the full stack trace