v0.42.1 Rewrite and simplication of concurrent-ruby support
This commit is contained in:
		@@ -1,7 +1,7 @@
 | 
				
			|||||||
# Federated Computer, Inc.
 | 
					# Federated Computer, Inc.
 | 
				
			||||||
# David Sainty <saint@federated.computer>  2024 A.D.
 | 
					# David Sainty <saint@federated.computer>  2024 A.D.
 | 
				
			||||||
# Gossamer Threads to Discourse -- Migration-Import Script
 | 
					# Gossamer Threads to Discourse -- Migration-Import Script
 | 
				
			||||||
# v0.42 Mutex addition for SQLite (which may be very important)
 | 
					# v0.42.1 Rewrite and simplication of concurrent-ruby support
 | 
				
			||||||
 | 
					
 | 
				
			||||||
require 'mysql2'
 | 
					require 'mysql2'
 | 
				
			||||||
require 'open-uri'
 | 
					require 'open-uri'
 | 
				
			||||||
@@ -1077,7 +1077,9 @@ class GossamerForumsImporter < ImportScripts::Base
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    parent_post_count = parent_post_ids.count
 | 
					    parent_post_count = parent_post_ids.count
 | 
				
			||||||
    batch_size = 100 # Set our batch size for number of posts to import in a single batch
 | 
					    batch_size = 100 # Set our batch size for number of posts to import in a single batch
 | 
				
			||||||
    current_post_batch = 0 # Set our current batch number. This tracks the current batch of posts being processed.
 | 
					
 | 
				
			||||||
 | 
					####    current_post_batch = 0 # Set our current batch number. This tracks the current batch of posts being processed.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    is_complete = false # Flag to indicate whether the import process is complete.
 | 
					    is_complete = false # Flag to indicate whether the import process is complete.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Mutex to control access to shared resources
 | 
					    # Mutex to control access to shared resources
 | 
				
			||||||
@@ -1086,25 +1088,33 @@ class GossamerForumsImporter < ImportScripts::Base
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    # Run until all posts have been processed.
 | 
					    # Run until all posts have been processed.
 | 
				
			||||||
    until is_complete
 | 
					    until is_complete
 | 
				
			||||||
      # Query in batches, create pool, wait for termination, do it again
 | 
					
 | 
				
			||||||
      current_post_batch_max = current_post_batch + batch_size
 | 
					####       # Query in batches, create pool, wait for termination, do it again
 | 
				
			||||||
 | 
					####      current_post_batch_max = current_post_batch + batch_size
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      # Get the next batch of posts
 | 
				
			||||||
 | 
					      current_post_batch = parent_post_ids.shift(batch_size)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      break if current_post_batch.empty?
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      # Process each post in the current batch
 | 
				
			||||||
 | 
					      current_post_batch.each do |post_id|
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#######    # Static pool size based on number of CPUs
 | 
					#######    # Static pool size based on number of CPUs
 | 
				
			||||||
#      # pool = Concurrent::FixedThreadPool.new(Concurrent.processor_count) # Create a thread pool that is bounded by processors avaialable
 | 
					#      # pool = Concurrent::FixedThreadPool.new(Concurrent.processor_count) # Create a thread pool that is bounded by processors avaialable
 | 
				
			||||||
#      # pool = Concurrent::FixedThreadPool.new(8)                          # Create a thread pool of 8 pool members
 | 
					#      # pool = Concurrent::FixedThreadPool.new(8)                          # Create a thread pool of 8 pool members
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      # Dynamically calculate the pool size based on system load to optimise performance
 | 
					####      # Dynamically calculate the pool size based on system load to optimise performance
 | 
				
			||||||
      pool_size = calculate_dynamic_pool_size            # Dynamically calculate what the pool size "ought" to be.
 | 
					####      pool_size = calculate_dynamic_pool_size            # Dynamically calculate what the pool size "ought" to be.
 | 
				
			||||||
      pool = Concurrent::FixedThreadPool.new(pool_size)  # Create a thread pool with the calculated size
 | 
					####      pool = Concurrent::FixedThreadPool.new(pool_size)  # Create a thread pool with the calculated size
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      # Process each post in the current batch
 | 
					####      # Process each post in the current batch
 | 
				
			||||||
      while current_post_batch < current_post_batch_max
 | 
					####      while current_post_batch < current_post_batch_max
 | 
				
			||||||
        post_id = parent_post_ids[current_post_batch]  # Fetch the post_id for the current post
 | 
					####        post_id = parent_post_ids[current_post_batch]  # Fetch the post_id for the current post
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Check if the post has already been processed or is incomplete
 | 
					####        # Check if the post has already been processed or is incomplete
 | 
				
			||||||
        post_status = post_status(post_id)
 | 
					####        post_status = post_status(post_id)
 | 
				
			||||||
        if post_status.nil? || post_status == 0
 | 
					 | 
				
			||||||
          puts "Starting import for post_id #{post_id} in batch #{current_post_batch / batch_size + 1} with #{pool_size} threads"
 | 
					 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
      # Submit the import job for the current post_id to the thread pool
 | 
					      # Submit the import job for the current post_id to the thread pool
 | 
				
			||||||
        pool.post do
 | 
					        pool.post do
 | 
				
			||||||
@@ -1116,17 +1126,22 @@ class GossamerForumsImporter < ImportScripts::Base
 | 
				
			|||||||
            password: "yxnh93Ybbz2Nm8#mp28zCVv",
 | 
					            password: "yxnh93Ybbz2Nm8#mp28zCVv",
 | 
				
			||||||
            database: "slowtwitch"
 | 
					            database: "slowtwitch"
 | 
				
			||||||
          )
 | 
					          )
 | 
				
			||||||
 | 
					 | 
				
			||||||
    puts "PP 22 --   ${post_id}"
 | 
					    puts "PP 22 --   ${post_id}"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          begin
 | 
				
			||||||
            # Use connection pooling for PostgreSQL and synchronize access to shared resources
 | 
					            # Use connection pooling for PostgreSQL and synchronize access to shared resources
 | 
				
			||||||
            ActiveRecord::Base.connection_pool.with_connection do
 | 
					            ActiveRecord::Base.connection_pool.with_connection do
 | 
				
			||||||
###              mutex.synchronize do
 | 
					              post_status = post_status(post_id)
 | 
				
			||||||
                begin
 | 
					              if post_status.nil? || post_status == 0
 | 
				
			||||||
                  puts "Processing post ID: #{post_id}"
 | 
					                puts "Starting import for post_id #{post_id}"
 | 
				
			||||||
                topic_import_job(post_id, mysql_client, sqlite_mutex)  # Import topic and its replies
 | 
					                topic_import_job(post_id, mysql_client, sqlite_mutex)  # Import topic and its replies
 | 
				
			||||||
                sqlite_mutex.sychronize do
 | 
					                sqlite_mutex.sychronize do
 | 
				
			||||||
                  mark_post_as_complete(post_id)  # Mark as complete in SQLite table
 | 
					                  mark_post_as_complete(post_id)  # Mark as complete in SQLite table
 | 
				
			||||||
                end
 | 
					                end
 | 
				
			||||||
 | 
					              else
 | 
				
			||||||
 | 
					                puts "Skipping post_id #{post_id}, already processed."
 | 
				
			||||||
 | 
					              end
 | 
				
			||||||
 | 
					            end
 | 
				
			||||||
          rescue => e
 | 
					          rescue => e
 | 
				
			||||||
            puts "Error processing post ID #{post_id}: #{e.message}"
 | 
					            puts "Error processing post ID #{post_id}: #{e.message}"
 | 
				
			||||||
            sqlite_mutex.sychronize do
 | 
					            sqlite_mutex.sychronize do
 | 
				
			||||||
@@ -1136,22 +1151,19 @@ class GossamerForumsImporter < ImportScripts::Base
 | 
				
			|||||||
            # Ensure the MariaDB connection is closed after processing
 | 
					            # Ensure the MariaDB connection is closed after processing
 | 
				
			||||||
            mysql_client.close if mysql_client
 | 
					            mysql_client.close if mysql_client
 | 
				
			||||||
          end
 | 
					          end
 | 
				
			||||||
###              end
 | 
					 | 
				
			||||||
        end
 | 
					        end
 | 
				
			||||||
      end
 | 
					      end
 | 
				
			||||||
        else
 | 
					 | 
				
			||||||
          puts "Skipping post_id #{post_id}, already processed."
 | 
					 | 
				
			||||||
        end
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        current_post_batch += 1  # Increment, moving to next post in the batch
 | 
					####        current_post_batch += 1  # Increment, moving to next post in the batch
 | 
				
			||||||
        break if current_post_batch >= parent_post_count
 | 
					####        break if current_post_batch >= parent_post_count
 | 
				
			||||||
      end
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      # Wait for all jobs in the current batch to finish before proceeding
 | 
				
			||||||
      pool.shutdown  # Initiate thread pool shutdown after all jobs submitted
 | 
					      pool.shutdown  # Initiate thread pool shutdown after all jobs submitted
 | 
				
			||||||
      pool.wait_for_termination  # Wait for all threads to finish exec
 | 
					      pool.wait_for_termination  # Wait for all threads to finish exec
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      # Set when all posts have been processed
 | 
					      # Check if all posts have been processed
 | 
				
			||||||
      is_complete = true if current_post_batch >= parent_post_count
 | 
					####      is_complete = true if current_post_batch >= parent_post_count
 | 
				
			||||||
 | 
					      is_complete = parent_post_ids.empty?
 | 
				
			||||||
    end
 | 
					    end
 | 
				
			||||||
  end
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user