v0.4 - Add debugging
This commit is contained in:
parent
655a2619f8
commit
fae7ef730a
@ -31,28 +31,43 @@ class GossamerForumsCorrectEncoding < ImportScripts::Base
|
|||||||
|
|
||||||
# Method to detect and fix text encoding
|
# Method to detect and fix text encoding
|
||||||
def fix_text_encoding(text)
|
def fix_text_encoding(text)
|
||||||
begin
|
# Detect encoding
|
||||||
# Detect encoding
|
detection = CharlockHolmes::EncodingDetector.detect(text)
|
||||||
detection = CharlockHolmes::EncodingDetector.detect(text)
|
original_encoding = detection[:encoding]
|
||||||
original_encoding = detection[:encoding]
|
puts "Original encoding detected: #{original_encoding}"
|
||||||
puts "Original encoding detected: #{original_encoding}"
|
|
||||||
|
|
||||||
# Force the encoding to the detected one, then covnert to UTF-8
|
# # Force the encoding to the detected one, then covnert to UTF-8
|
||||||
if original_encoding == 'ISO-8859-1' || original_encoding == 'windows-1252'
|
# if original_encoding == 'ISO-8859-1' || original_encoding == 'windows-1252'
|
||||||
# For Windows-1252 or ISO-8859-1, force the encoding and convert to UTF-8
|
# # For Windows-1252 or ISO-8859-1, force the encoding and convert to UTF-8
|
||||||
# text.force_encoding('ISO-8859-1').encode('UTF-8')
|
# # text.force_encoding('ISO-8859-1').encode('UTF-8')
|
||||||
text.force_encoding(original_encoding).encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
|
# text.force_encoding(original_encoding).encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
|
||||||
else
|
# else
|
||||||
# Try to convert from detected encoding to UTF-8
|
# # Try to convert from detected encoding to UTF-8
|
||||||
text.encode('UTF-8', original_encoding, invalid: :replace, undef: :replace, replace: '?')
|
# text.encode('UTF-8', original_encoding, invalid: :replace, undef: :replace, replace: '?')
|
||||||
|
# end
|
||||||
|
|
||||||
|
if original_encoding
|
||||||
|
begin
|
||||||
|
decoded_content = CharlockHolmes::Converter.convert(raw_content, original_encoding, 'UTF-8')
|
||||||
|
rescue => e
|
||||||
|
puts "Error during encoding conversion: #{e.message}"
|
||||||
|
decoded_content = raw_content # Fall back to raw content if decoding fails
|
||||||
end
|
end
|
||||||
rescue StandardError => e
|
else
|
||||||
puts "Error during encoding conversion: #{e.message}"
|
decoded_content = raw_content # Fallback if encoding detection fails
|
||||||
puts e.backtrace.join("\n") # Print the full stack trace
|
|
||||||
text
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Step 3: Ensure the content is now correctly in UTF-8 (no need to encode again)
|
||||||
|
return decoded_content
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# rescue StandardError => e
|
||||||
|
# puts "Error during encoding conversion: #{e.message}"
|
||||||
|
# puts e.backtrace.join("\n") # Print the full stack trace
|
||||||
|
# text
|
||||||
|
# end
|
||||||
|
# end
|
||||||
|
|
||||||
# Method to fix encoding issues in post content
|
# Method to fix encoding issues in post content
|
||||||
def fix_encoding
|
def fix_encoding
|
||||||
offset = 0
|
offset = 0
|
||||||
|
Loading…
Reference in New Issue
Block a user