# Federated Computer, Inc. # David Sainty 2024 A.D. # Gossamer Threads to Discourse -- Correct Encoding # v0.3 Debugging require 'mysql2' require 'active_record' require 'charlock_holmes' # require 'concurrent-ruby' require File.expand_path("../../../../config/environment", __FILE__) require File.expand_path("../../../../script/import_scripts/base", __FILE__) class GossamerForumsCorrectEncoding < ImportScripts::Base def initialize super begin # Initialize MySQL client to connect to Gossamer Forums database @mysql_client = Mysql2::Client.new( host: "slowtwitch.northend.network", username: "admin", password: "yxnh93Ybbz2Nm8#mp28zCVv", database: "slowtwitch" ) rescue Mysql2::Error => e puts "Error connecting to MySQL: #{e.message}" exit 1 end @batch_size = 1000 # Number of posts to process in each batch end # Method to detect and fix text encoding def fix_text_encoding(text) begin # Detect encoding detection = CharlockHolmes::Detect.detect(text) original_encoding = detection[:encoding] puts "Original encoding detected: #{original_encoding}" if original_encoding == 'ISO-8859-1' text.force_encoding('ISO-8859-1').encode('UTF-8') else # Try to convert from detected encoding to UTF-8 text.encode('UTF-8', original_encoding, invalid: :replace, undef: :replace, replace: '?') end rescue StandardError => e puts "Error during encoding conversion: #{e.message}" puts e.backtrace.join("\n") # Print the full stack trace text end end # Method to fix encoding issues in post content def fix_encoding offset = 0 loop do puts "OFFSET: #{offset}" begin posts = Post.limit(@batch_size).offset(offset) break if posts.empty? posts.each do |post| raw_content = post.raw puts "--> NEXT POST: post.id: #{post.id}" fixed_content = fix_text_encoding(raw_content) if fixed_content != raw_content puts "Updating post #{post.id}" puts "------- raw_content:\n#{raw_content}" puts "+++++++ fixed_content:\n#{fixed_content}" puts "---------------------------------------------------------------------------------------------" # post.update(raw: fixed_content) # post.raw = fixed_content # if post.save # puts "Post ##{post.id} updated successfully." # else # puts "Failed to update Post ##{post.id}: #{post.errors.full_messages.join(', ')}" # end end end rescue puts "Error: #{e.message}" puts e.backtrace.join("\n") # Print the full stack trace end offset += @batch_size end end def perform_encoding_correction puts "Encoding Correction beginning!" fix_encoding puts "Encoding Correction complete!" end end GossamerForumsCorrectEncoding.new.perform_encoding_correction