v0.42.1 Rewrite and simplication of concurrent-ruby support
This commit is contained in:
parent
b44e9baf57
commit
f90d0fb6ae
@ -1,7 +1,7 @@
|
|||||||
# Federated Computer, Inc.
|
# Federated Computer, Inc.
|
||||||
# David Sainty <saint@federated.computer> 2024 A.D.
|
# David Sainty <saint@federated.computer> 2024 A.D.
|
||||||
# Gossamer Threads to Discourse -- Migration-Import Script
|
# Gossamer Threads to Discourse -- Migration-Import Script
|
||||||
# v0.42 Mutex addition for SQLite (which may be very important)
|
# v0.42.1 Rewrite and simplication of concurrent-ruby support
|
||||||
|
|
||||||
require 'mysql2'
|
require 'mysql2'
|
||||||
require 'open-uri'
|
require 'open-uri'
|
||||||
@ -1077,7 +1077,9 @@ class GossamerForumsImporter < ImportScripts::Base
|
|||||||
|
|
||||||
parent_post_count = parent_post_ids.count
|
parent_post_count = parent_post_ids.count
|
||||||
batch_size = 100 # Set our batch size for number of posts to import in a single batch
|
batch_size = 100 # Set our batch size for number of posts to import in a single batch
|
||||||
current_post_batch = 0 # Set our current batch number. This tracks the current batch of posts being processed.
|
|
||||||
|
#### current_post_batch = 0 # Set our current batch number. This tracks the current batch of posts being processed.
|
||||||
|
|
||||||
is_complete = false # Flag to indicate whether the import process is complete.
|
is_complete = false # Flag to indicate whether the import process is complete.
|
||||||
|
|
||||||
# Mutex to control access to shared resources
|
# Mutex to control access to shared resources
|
||||||
@ -1086,25 +1088,33 @@ class GossamerForumsImporter < ImportScripts::Base
|
|||||||
|
|
||||||
# Run until all posts have been processed.
|
# Run until all posts have been processed.
|
||||||
until is_complete
|
until is_complete
|
||||||
# Query in batches, create pool, wait for termination, do it again
|
|
||||||
current_post_batch_max = current_post_batch + batch_size
|
#### # Query in batches, create pool, wait for termination, do it again
|
||||||
|
#### current_post_batch_max = current_post_batch + batch_size
|
||||||
|
|
||||||
|
# Get the next batch of posts
|
||||||
|
current_post_batch = parent_post_ids.shift(batch_size)
|
||||||
|
|
||||||
|
break if current_post_batch.empty?
|
||||||
|
|
||||||
|
# Process each post in the current batch
|
||||||
|
current_post_batch.each do |post_id|
|
||||||
|
|
||||||
|
|
||||||
####### # Static pool size based on number of CPUs
|
####### # Static pool size based on number of CPUs
|
||||||
# # pool = Concurrent::FixedThreadPool.new(Concurrent.processor_count) # Create a thread pool that is bounded by processors avaialable
|
# # pool = Concurrent::FixedThreadPool.new(Concurrent.processor_count) # Create a thread pool that is bounded by processors avaialable
|
||||||
# # pool = Concurrent::FixedThreadPool.new(8) # Create a thread pool of 8 pool members
|
# # pool = Concurrent::FixedThreadPool.new(8) # Create a thread pool of 8 pool members
|
||||||
|
|
||||||
# Dynamically calculate the pool size based on system load to optimise performance
|
#### # Dynamically calculate the pool size based on system load to optimise performance
|
||||||
pool_size = calculate_dynamic_pool_size # Dynamically calculate what the pool size "ought" to be.
|
#### pool_size = calculate_dynamic_pool_size # Dynamically calculate what the pool size "ought" to be.
|
||||||
pool = Concurrent::FixedThreadPool.new(pool_size) # Create a thread pool with the calculated size
|
#### pool = Concurrent::FixedThreadPool.new(pool_size) # Create a thread pool with the calculated size
|
||||||
|
|
||||||
# Process each post in the current batch
|
#### # Process each post in the current batch
|
||||||
while current_post_batch < current_post_batch_max
|
#### while current_post_batch < current_post_batch_max
|
||||||
post_id = parent_post_ids[current_post_batch] # Fetch the post_id for the current post
|
#### post_id = parent_post_ids[current_post_batch] # Fetch the post_id for the current post
|
||||||
|
|
||||||
# Check if the post has already been processed or is incomplete
|
#### # Check if the post has already been processed or is incomplete
|
||||||
post_status = post_status(post_id)
|
#### post_status = post_status(post_id)
|
||||||
if post_status.nil? || post_status == 0
|
|
||||||
puts "Starting import for post_id #{post_id} in batch #{current_post_batch / batch_size + 1} with #{pool_size} threads"
|
|
||||||
|
|
||||||
# Submit the import job for the current post_id to the thread pool
|
# Submit the import job for the current post_id to the thread pool
|
||||||
pool.post do
|
pool.post do
|
||||||
@ -1116,17 +1126,22 @@ class GossamerForumsImporter < ImportScripts::Base
|
|||||||
password: "yxnh93Ybbz2Nm8#mp28zCVv",
|
password: "yxnh93Ybbz2Nm8#mp28zCVv",
|
||||||
database: "slowtwitch"
|
database: "slowtwitch"
|
||||||
)
|
)
|
||||||
|
|
||||||
puts "PP 22 -- ${post_id}"
|
puts "PP 22 -- ${post_id}"
|
||||||
|
|
||||||
|
begin
|
||||||
# Use connection pooling for PostgreSQL and synchronize access to shared resources
|
# Use connection pooling for PostgreSQL and synchronize access to shared resources
|
||||||
ActiveRecord::Base.connection_pool.with_connection do
|
ActiveRecord::Base.connection_pool.with_connection do
|
||||||
### mutex.synchronize do
|
post_status = post_status(post_id)
|
||||||
begin
|
if post_status.nil? || post_status == 0
|
||||||
puts "Processing post ID: #{post_id}"
|
puts "Starting import for post_id #{post_id}"
|
||||||
topic_import_job(post_id, mysql_client, sqlite_mutex) # Import topic and its replies
|
topic_import_job(post_id, mysql_client, sqlite_mutex) # Import topic and its replies
|
||||||
sqlite_mutex.sychronize do
|
sqlite_mutex.sychronize do
|
||||||
mark_post_as_complete(post_id) # Mark as complete in SQLite table
|
mark_post_as_complete(post_id) # Mark as complete in SQLite table
|
||||||
end
|
end
|
||||||
|
else
|
||||||
|
puts "Skipping post_id #{post_id}, already processed."
|
||||||
|
end
|
||||||
|
end
|
||||||
rescue => e
|
rescue => e
|
||||||
puts "Error processing post ID #{post_id}: #{e.message}"
|
puts "Error processing post ID #{post_id}: #{e.message}"
|
||||||
sqlite_mutex.sychronize do
|
sqlite_mutex.sychronize do
|
||||||
@ -1136,22 +1151,19 @@ class GossamerForumsImporter < ImportScripts::Base
|
|||||||
# Ensure the MariaDB connection is closed after processing
|
# Ensure the MariaDB connection is closed after processing
|
||||||
mysql_client.close if mysql_client
|
mysql_client.close if mysql_client
|
||||||
end
|
end
|
||||||
### end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
else
|
|
||||||
puts "Skipping post_id #{post_id}, already processed."
|
|
||||||
end
|
|
||||||
|
|
||||||
current_post_batch += 1 # Increment, moving to next post in the batch
|
#### current_post_batch += 1 # Increment, moving to next post in the batch
|
||||||
break if current_post_batch >= parent_post_count
|
#### break if current_post_batch >= parent_post_count
|
||||||
end
|
|
||||||
|
|
||||||
|
# Wait for all jobs in the current batch to finish before proceeding
|
||||||
pool.shutdown # Initiate thread pool shutdown after all jobs submitted
|
pool.shutdown # Initiate thread pool shutdown after all jobs submitted
|
||||||
pool.wait_for_termination # Wait for all threads to finish exec
|
pool.wait_for_termination # Wait for all threads to finish exec
|
||||||
|
|
||||||
# Set when all posts have been processed
|
# Check if all posts have been processed
|
||||||
is_complete = true if current_post_batch >= parent_post_count
|
#### is_complete = true if current_post_batch >= parent_post_count
|
||||||
|
is_complete = parent_post_ids.empty?
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user