v0.6 Further attempt to get this reverse dobule encoding right now
This commit is contained in:
parent
535adfebba
commit
3cb70bb799
@ -44,26 +44,6 @@ class GossamerForumsCorrectEncoding < ImportScripts::Base
|
|||||||
corrected_content
|
corrected_content
|
||||||
end
|
end
|
||||||
|
|
||||||
# Ensure the content is treated as UTF-8 (even if incorrectly encoded)
|
|
||||||
content.force_encoding('UTF-8')
|
|
||||||
|
|
||||||
# Continue decoding until no more invalid sequences are found
|
|
||||||
previous_content = ""
|
|
||||||
while previous_content != content
|
|
||||||
previous_content = content.dup
|
|
||||||
|
|
||||||
# Step 1: First attempt to convert from ISO-8859-1 to UTF-8
|
|
||||||
if content.valid_encoding?
|
|
||||||
# Decode from ISO-8859-1 (or Windows-1252) to UTF-8
|
|
||||||
content = content.force_encoding('ISO-8859-1').encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
|
|
||||||
else
|
|
||||||
content = previous_content # Stop if invalid encoding issues arise
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return content
|
|
||||||
end
|
|
||||||
|
|
||||||
# # Step 1: Try to detect encoding of the corrupted (double-encoded) content
|
# # Step 1: Try to detect encoding of the corrupted (double-encoded) content
|
||||||
# detection = CharlockHolmes::EncodingDetector.detect(broken_content)
|
# detection = CharlockHolmes::EncodingDetector.detect(broken_content)
|
||||||
# original_encoding = detection[:encoding]
|
# original_encoding = detection[:encoding]
|
||||||
|
Loading…
Reference in New Issue
Block a user