concurrency outline
This commit is contained in:
		@@ -14,6 +14,8 @@ require 'fileutils'
 | 
				
			|||||||
require 'csv'
 | 
					require 'csv'
 | 
				
			||||||
require 'time'
 | 
					require 'time'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					require 'concurrent'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
require File.expand_path("../../../../config/environment", __FILE__)
 | 
					require File.expand_path("../../../../config/environment", __FILE__)
 | 
				
			||||||
# require_relative '../base'
 | 
					# require_relative '../base'
 | 
				
			||||||
require File.expand_path("../../../../script/import_scripts/base", __FILE__)
 | 
					require File.expand_path("../../../../script/import_scripts/base", __FILE__)
 | 
				
			||||||
@@ -925,6 +927,50 @@ class GossamerForumsImporter < ImportScripts::Base
 | 
				
			|||||||
    result ? result['post_views'] : 0
 | 
					    result ? result['post_views'] : 0
 | 
				
			||||||
  end
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  #THREADING OUTLINE HERE --------------------------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def threaded_topic_import
 | 
				
			||||||
 | 
					    # Get list of IDS that have no parent ID - SELECT post_id FROM gforum_Post WHERE post_root_id = 0;
 | 
				
			||||||
 | 
					    parent_post_ids = execute_query("SELECT post_id FROM gforum_Post WHERE post_root_id = 0")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    parent_post_count = parent_post_ids.count
 | 
				
			||||||
 | 
					    batch_size = 100 #set our batch size
 | 
				
			||||||
 | 
					    current_post_batch = 0 #set our current batch number
 | 
				
			||||||
 | 
					    is_complete = false
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    until is_complete
 | 
				
			||||||
 | 
					      # Query in batches, create pool, wait for termination, do it again
 | 
				
			||||||
 | 
					    # SELECT post_id FROM gforum_Post WHERE post_root_id = 0 ORDER BY post_id  
 | 
				
			||||||
 | 
					      current_post_batch_max = current_post_batch + batch_size
 | 
				
			||||||
 | 
					      pool = Concurrent::FixedThreadPool.new(Concurrent.processor_count) #create thread pool that is bounded by processors avaialable
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      while current_post_batch < current_post_batch_max
 | 
				
			||||||
 | 
					        post_id = parent_post_ids[current_post_batch]
 | 
				
			||||||
 | 
					        pool.post do
 | 
				
			||||||
 | 
					          topic_import_job(post_id)
 | 
				
			||||||
 | 
					        end
 | 
				
			||||||
 | 
					        current_post_batch += 1
 | 
				
			||||||
 | 
					      end
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      pool.shutdown
 | 
				
			||||||
 | 
					      pool.wait_for_termination
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      if current_post_batch >= parent_post_count
 | 
				
			||||||
 | 
					        is_complete = true
 | 
				
			||||||
 | 
					      end
 | 
				
			||||||
 | 
					    end
 | 
				
			||||||
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def topic_import_job(post_id)
 | 
				
			||||||
 | 
					    #Here is where you can import the entire topic
 | 
				
			||||||
 | 
					    #Get post -- SELECT post_id, user_id_fk, forum_id_fk, post_root_id, post_subject, post_time, post_message, post_father_id, post_replies FROM gforum_Post WHERE post_id = post_id
 | 
				
			||||||
 | 
					    #check if exists, create if not
 | 
				
			||||||
 | 
					    #get children, create -- SELECT post_id, user_id_fk, forum_id_fk, post_root_id, post_subject, post_time, post_message, post_father_id, post_replies FROM gforum_Post WHERE post_root_id = post_id
 | 
				
			||||||
 | 
					    #this parts needs to be synchronously to avoid race conditions
 | 
				
			||||||
 | 
					  end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  #-------------------------------------------------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  # Import topics and posts from Gossamer Forums to Discourse
 | 
					  # Import topics and posts from Gossamer Forums to Discourse
 | 
				
			||||||
  def import_topics_and_posts_with_attachments
 | 
					  def import_topics_and_posts_with_attachments
 | 
				
			||||||
    puts "Importing topics and posts with attachments..."
 | 
					    puts "Importing topics and posts with attachments..."
 | 
				
			||||||
@@ -946,7 +992,7 @@ class GossamerForumsImporter < ImportScripts::Base
 | 
				
			|||||||
#   Attachment example:  highest_processed_post_id = 1359862 
 | 
					#   Attachment example:  highest_processed_post_id = 1359862 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Execute the query to get all posts ordered by post_id
 | 
					    # Execute the query to get all posts ordered by post_id
 | 
				
			||||||
    execute_query("SELECT post_id, user_id_fk, forum_id_fk, post_root_id, post_subject, post_time, post_message, post_likes, post_father_id, post_replies FROM gforum_Post ORDER BY post_id").each do |row|
 | 
					    execute_query("SELECT post_id, user_id_fk, forum_id_fk, post_root_id, post_subject, post_time, post_message, post_father_id, post_replies FROM gforum_Post ORDER BY post_id").each do |row|
 | 
				
			||||||
      post_id = row['post_id'].to_i
 | 
					      post_id = row['post_id'].to_i
 | 
				
			||||||
 | 
					
 | 
				
			||||||
     # Skip posts that have already been processed
 | 
					     # Skip posts that have already been processed
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user