v0.2 - Add debugging
This commit is contained in:
parent
85c93af281
commit
538185095d
@ -1,7 +1,7 @@
|
||||
# Federated Computer, Inc.
|
||||
# David Sainty <saint@federated.computer> 2024 A.D.
|
||||
# Gossamer Threads to Discourse -- Correct Encoding
|
||||
# v0.1 New script
|
||||
# v0.2 Debugging
|
||||
|
||||
require 'mysql2'
|
||||
require 'active_record'
|
||||
@ -31,20 +31,22 @@ class GossamerForumsCorrectEncoding < ImportScripts::Base
|
||||
|
||||
# Method to detect and fix text encoding
|
||||
def fix_text_encoding(text)
|
||||
# Detect encoding
|
||||
detection = CharlockHolmes::Detect.detect(text)
|
||||
original_encoding = detection[:encoding]
|
||||
puts "Original encoding detected: #{original_encoding}"
|
||||
|
||||
if original_encoding == 'ISO-8859-1'
|
||||
text.force_encoding('ISO-8859-1').encode('UTF-8')
|
||||
else
|
||||
# Try to convert from detected encoding to UTF-8
|
||||
text.encode('UTF-8', original_encoding, invalid: :replace, undef: :replace, replace: '?')
|
||||
end
|
||||
rescue StandardError => e
|
||||
puts "Error during encoding conversion: #{e.message}"
|
||||
text
|
||||
begin
|
||||
# Detect encoding
|
||||
detection = CharlockHolmes::Detect.detect(text)
|
||||
original_encoding = detection[:encoding]
|
||||
puts "Original encoding detected: #{original_encoding}"
|
||||
|
||||
if original_encoding == 'ISO-8859-1'
|
||||
text.force_encoding('ISO-8859-1').encode('UTF-8')
|
||||
else
|
||||
# Try to convert from detected encoding to UTF-8
|
||||
text.encode('UTF-8', original_encoding, invalid: :replace, undef: :replace, replace: '?')
|
||||
end
|
||||
rescue StandardError => e
|
||||
puts "Error during encoding conversion: #{e.message}"
|
||||
puts e.backtrace.join("\n") # Print the full stack trace
|
||||
text
|
||||
end
|
||||
|
||||
# Method to fix encoding issues in post content
|
||||
@ -52,25 +54,32 @@ class GossamerForumsCorrectEncoding < ImportScripts::Base
|
||||
offset = 0
|
||||
|
||||
loop do
|
||||
posts = Post.limit(@batch_size).offset(offset)
|
||||
break if posts.empty?
|
||||
puts "OFFSET: #{offset}"
|
||||
begin
|
||||
posts = Post.limit(@batch_size).offset(offset)
|
||||
break if posts.empty?
|
||||
|
||||
posts.each do |post|
|
||||
raw_content = post.raw
|
||||
fixed_content = fix_text_encoding(raw_content)
|
||||
if fixed_content != raw_content
|
||||
puts "Updating post ##{post.id}"
|
||||
puts "------- raw_content:\n#{raw_content}"
|
||||
puts "+++++++ fixed_content:\n#{fixed_content}"
|
||||
puts "---------------------------------------------------------------------------------------------"
|
||||
# post.update(raw: fixed_content)
|
||||
# post.raw = fixed_content
|
||||
# if post.save
|
||||
# puts "Post ##{post.id} updated successfully."
|
||||
# else
|
||||
# puts "Failed to update Post ##{post.id}: #{post.errors.full_messages.join(', ')}"
|
||||
# end
|
||||
posts.each do |post|
|
||||
raw_content = post.raw
|
||||
puts "--> NEXT POST: post.id: #{post.id}"
|
||||
fixed_content = fix_text_encoding(raw_content)
|
||||
if fixed_content != raw_content
|
||||
puts "Updating post #{post.id}"
|
||||
puts "------- raw_content:\n#{raw_content}"
|
||||
puts "+++++++ fixed_content:\n#{fixed_content}"
|
||||
puts "---------------------------------------------------------------------------------------------"
|
||||
# post.update(raw: fixed_content)
|
||||
# post.raw = fixed_content
|
||||
# if post.save
|
||||
# puts "Post ##{post.id} updated successfully."
|
||||
# else
|
||||
# puts "Failed to update Post ##{post.id}: #{post.errors.full_messages.join(', ')}"
|
||||
# end
|
||||
end
|
||||
end
|
||||
rescue
|
||||
puts "Error: #{e.message}"
|
||||
puts e.backtrace.join("\n") # Print the full stack trace
|
||||
end
|
||||
|
||||
offset += @batch_size
|
||||
|
Loading…
Reference in New Issue
Block a user