v0.41 Further improve FULL concurrency support, for both MySQL/MariaDB _and_ importantly, the PostGreSQL Discourse DB additions and changes with ActiveRecord connection pooling and Mutex

This commit is contained in:
David Sainty 2024-08-17 15:55:22 +10:00
parent 168dcc9db7
commit a005cda0ae

View File

@ -1,7 +1,7 @@
# Federated Computer, Inc. # Federated Computer, Inc.
# David Sainty <saint@federated.computer> 2024 A.D. # David Sainty <saint@federated.computer> 2024 A.D.
# Gossamer Threads to Discourse -- Migration-Import Script # Gossamer Threads to Discourse -- Migration-Import Script
# v0.40 Move to per thread MySQL/MariaDB connection # v0.41 Further improve FULL concurrency support, for both MySQL/MariaDB _and_ importantly, the PostGreSQL Discourse DB additions and changes with ActiveRecord connection pooling and Mutex
require 'mysql2' require 'mysql2'
require 'open-uri' require 'open-uri'
@ -1072,16 +1072,20 @@ class GossamerForumsImporter < ImportScripts::Base
current_post_batch = 0 # Set our current batch number. This tracks the current batch of posts being processed. current_post_batch = 0 # Set our current batch number. This tracks the current batch of posts being processed.
is_complete = false # Flag to indicate whether the import process is complete. is_complete = false # Flag to indicate whether the import process is complete.
# Mutex to control access to shared resources
mutex = Mutex.new
# Run until all posts have been processed. # Run until all posts have been processed.
until is_complete until is_complete
# Query in batches, create pool, wait for termination, do it again # Query in batches, create pool, wait for termination, do it again
current_post_batch_max = current_post_batch + batch_size current_post_batch_max = current_post_batch + batch_size
## Static pool size based on number of CPUs # # Static pool size based on number of CPUs
# pool = Concurrent::FixedThreadPool.new(Concurrent.processor_count) #create thread pool that is bounded by processors avaialable, however play with the number to see what works best # # pool = Concurrent::FixedThreadPool.new(Concurrent.processor_count) # Create a thread pool that is bounded by processors avaialable
# # pool = Concurrent::FixedThreadPool.new(8) # Create a thread pool of 8 pool members
# Dynamically calculate the pool size based on system load to optimise performance # Dynamically calculate the pool size based on system load to optimise performance
pool_size = calculate_dynamic_pool_size pool_size = calculate_dynamic_pool_size # Dynamically calculate what the pool size "ought" to be.
pool = Concurrent::FixedThreadPool.new(pool_size) # Create a thread pool with the calculated size pool = Concurrent::FixedThreadPool.new(pool_size) # Create a thread pool with the calculated size
# Process each post in the current batch # Process each post in the current batch
@ -1102,6 +1106,10 @@ class GossamerForumsImporter < ImportScripts::Base
password: "yxnh93Ybbz2Nm8#mp28zCVv", password: "yxnh93Ybbz2Nm8#mp28zCVv",
database: "slowtwitch" database: "slowtwitch"
) )
# Use connection ppoling for PostgreSQL and synchronize access to shared resources
ActiveRecord::Base.connection_pool.with_connection do
mutex.synchronize do
begin begin
puts "Processing post ID: #{post_id}" puts "Processing post ID: #{post_id}"
topic_import_job(post_id, mysql_client) # Import topic and its replies topic_import_job(post_id, mysql_client) # Import topic and its replies
@ -1110,9 +1118,12 @@ class GossamerForumsImporter < ImportScripts::Base
puts "Error processing post ID #{post_id}: #{e.message}" puts "Error processing post ID #{post_id}: #{e.message}"
mark_post_as_failed(post_id) mark_post_as_failed(post_id)
ensure ensure
# Ensure the MariaDB connection is closed after processing
mysql_client.close if mysql_client mysql_client.close if mysql_client
end end
end end
end
end
else else
puts "Skipping post_id #{post_id}, already processed." puts "Skipping post_id #{post_id}, already processed."
end end
@ -1172,6 +1183,7 @@ class GossamerForumsImporter < ImportScripts::Base
# Check if the topic has already been imported using the custom field 'original_gossamer_id' # Check if the topic has already been imported using the custom field 'original_gossamer_id'
unless TopicCustomField.exists?(name: 'original_gossamer_id', value: row['post_id']) unless TopicCustomField.exists?(name: 'original_gossamer_id', value: row['post_id'])
ActiveRecord::Base.transaction do
# Create the new topic in Discourse # Create the new topic in Discourse
begin begin
suffix = 1 suffix = 1
@ -1243,7 +1255,7 @@ class GossamerForumsImporter < ImportScripts::Base
new_url = "https://new/t/#{topic.slug}/#{topic.id}" new_url = "https://new/t/#{topic.slug}/#{topic.id}"
insert_url_mapping(row['post_id'], new_url, unique_title) insert_url_mapping(row['post_id'], new_url, unique_title)
# Now fetch and import all replies to this topic # Fetch and import all replies to this topic
replies = execute_query("SELECT post_id, user_id_fk, post_message, post_time, FROM gforum_Post WHERE post_root_id = #{post_id} ORDER BY post_time ASC") replies = execute_query("SELECT post_id, user_id_fk, post_message, post_time, FROM gforum_Post WHERE post_root_id = #{post_id} ORDER BY post_time ASC")
# Import each reply sequentially # Import each reply sequentially
@ -1289,7 +1301,8 @@ class GossamerForumsImporter < ImportScripts::Base
# # Update the highest processed post_id in the database (thread-safe) # # Update the highest processed post_id in the database (thread-safe)
# update_highest_processed_post_id_thread_safe(reply_row['post_id']) # update_highest_processed_post_id_thread_safe(reply_row['post_id'])
rescue ActiveRecord::RecordInvalid => e # rescue ActiveRecord::RecordInvalid => e
rescue => e
puts "Error importing reply with post_id #{reply_row['post_id']}: #{e.message}" puts "Error importing reply with post_id #{reply_row['post_id']}: #{e.message}"
end end
end end
@ -1299,8 +1312,12 @@ class GossamerForumsImporter < ImportScripts::Base
rescue ActiveRecord::RecordInvalid => e rescue ActiveRecord::RecordInvalid => e
puts "Error importing topic with post_id #{row['post_id']}: #{e.message}" puts "Error importing topic with post_id #{row['post_id']}: #{e.message}"
raise ActiveRecord::Rollback
end end
end end
else
puts "Topic for post_id #{row['post_id']} already exists, skipping creation."
end
end end