concurrency outline

This commit is contained in:
Ross Trottier 2024-08-15 11:49:52 -06:00 committed by David Sainty
parent 0ad1482f95
commit b73be6d27f

View File

@ -14,6 +14,8 @@ require 'fileutils'
require 'csv' require 'csv'
require 'time' require 'time'
require 'concurrent'
require File.expand_path("../../../../config/environment", __FILE__) require File.expand_path("../../../../config/environment", __FILE__)
# require_relative '../base' # require_relative '../base'
require File.expand_path("../../../../script/import_scripts/base", __FILE__) require File.expand_path("../../../../script/import_scripts/base", __FILE__)
@ -925,6 +927,50 @@ class GossamerForumsImporter < ImportScripts::Base
result ? result['post_views'] : 0 result ? result['post_views'] : 0
end end
#THREADING OUTLINE HERE --------------------------------------------
def threaded_topic_import
# Get list of IDS that have no parent ID - SELECT post_id FROM gforum_Post WHERE post_root_id = 0;
parent_post_ids = execute_query("SELECT post_id FROM gforum_Post WHERE post_root_id = 0")
parent_post_count = parent_post_ids.count
batch_size = 100 #set our batch size
current_post_batch = 0 #set our current batch number
is_complete = false
until is_complete
# Query in batches, create pool, wait for termination, do it again
# SELECT post_id FROM gforum_Post WHERE post_root_id = 0 ORDER BY post_id
current_post_batch_max = current_post_batch + batch_size
pool = Concurrent::FixedThreadPool.new(Concurrent.processor_count) #create thread pool that is bounded by processors avaialable
while current_post_batch < current_post_batch_max
post_id = parent_post_ids[current_post_batch]
pool.post do
topic_import_job(post_id)
end
current_post_batch += 1
end
pool.shutdown
pool.wait_for_termination
if current_post_batch >= parent_post_count
is_complete = true
end
end
end
def topic_import_job(post_id)
#Here is where you can import the entire topic
#Get post -- SELECT post_id, user_id_fk, forum_id_fk, post_root_id, post_subject, post_time, post_message, post_father_id, post_replies FROM gforum_Post WHERE post_id = post_id
#check if exists, create if not
#get children, create -- SELECT post_id, user_id_fk, forum_id_fk, post_root_id, post_subject, post_time, post_message, post_father_id, post_replies FROM gforum_Post WHERE post_root_id = post_id
#this parts needs to be synchronously to avoid race conditions
end
#-------------------------------------------------------------------
# Import topics and posts from Gossamer Forums to Discourse # Import topics and posts from Gossamer Forums to Discourse
def import_topics_and_posts_with_attachments def import_topics_and_posts_with_attachments
puts "Importing topics and posts with attachments..." puts "Importing topics and posts with attachments..."
@ -946,7 +992,7 @@ class GossamerForumsImporter < ImportScripts::Base
# Attachment example: highest_processed_post_id = 1359862 # Attachment example: highest_processed_post_id = 1359862
# Execute the query to get all posts ordered by post_id # Execute the query to get all posts ordered by post_id
execute_query("SELECT post_id, user_id_fk, forum_id_fk, post_root_id, post_subject, post_time, post_message, post_likes, post_father_id, post_replies FROM gforum_Post ORDER BY post_id").each do |row| execute_query("SELECT post_id, user_id_fk, forum_id_fk, post_root_id, post_subject, post_time, post_message, post_father_id, post_replies FROM gforum_Post ORDER BY post_id").each do |row|
post_id = row['post_id'].to_i post_id = row['post_id'].to_i
# Skip posts that have already been processed # Skip posts that have already been processed