v0.41.2 Several bug fixes and improvements for fully concurrent topic-post import

This commit is contained in:
David Sainty 2024-08-17 18:52:51 +10:00
parent 4f608ce87f
commit 29f66c1e60

View File

@ -1,7 +1,7 @@
# Federated Computer, Inc. # Federated Computer, Inc.
# David Sainty <saint@federated.computer> 2024 A.D. # David Sainty <saint@federated.computer> 2024 A.D.
# Gossamer Threads to Discourse -- Migration-Import Script # Gossamer Threads to Discourse -- Migration-Import Script
# v0.41.1 Fixes for FULL concurrency support # v0.41.2 Several bug fixes and improvements for fully concurrent topic-post import
require 'mysql2' require 'mysql2'
require 'open-uri' require 'open-uri'
@ -45,6 +45,15 @@ class GossamerForumsImporter < ImportScripts::Base
def initialize_sqlite_id_name_url_db def initialize_sqlite_id_name_url_db
@db = SQLite3::Database.new '/bitnami/discourse/sqlite/id_name_url_map.db' @db = SQLite3::Database.new '/bitnami/discourse/sqlite/id_name_url_map.db'
###### ONLY when we need to clear the url_map and topic_import_status .... e.g. if reimporting topics-posts from scratch
# @db.execute <<-SQL
# DROP TABLE IF EXISTS url_map;
# SQL
# @db.execute <<-SQL
# DROP TABLE IF EXISTS topic_import_status;
# SQL
@db.execute <<-SQL @db.execute <<-SQL
CREATE TABLE IF NOT EXISTS user_id_map ( CREATE TABLE IF NOT EXISTS user_id_map (
old_user_id INTEGER PRIMARY KEY, old_user_id INTEGER PRIMARY KEY,
@ -66,12 +75,6 @@ class GossamerForumsImporter < ImportScripts::Base
real_name TEXT real_name TEXT
); );
SQL SQL
# ONLY when we need to clear the url_map e.g. if reimporting topics-posts from scratch
# @db.execute <<-SQL
# DROP TABLE IF EXISTS url_map;
# SQL
@db.execute <<-SQL @db.execute <<-SQL
CREATE TABLE IF NOT EXISTS url_map ( CREATE TABLE IF NOT EXISTS url_map (
old_post_id INTEGER PRIMARY KEY, old_post_id INTEGER PRIMARY KEY,
@ -171,7 +174,7 @@ class GossamerForumsImporter < ImportScripts::Base
# Insert a URL mapping into the SQLite database # Insert a URL mapping into the SQLite database
def insert_url_mapping(old_post_id, new_url, title) def insert_url_mapping(old_post_id, new_url, title)
@db.execute "INSERT INTO url_map (old_post_id, new_url, title) VALUES (?, ?, ?)", [old_post_id, new_url, title] @db.execute "INSERT OR REPLACE INTO url_map (old_post_id, new_url, title) VALUES (?, ?, ?)", [old_post_id, new_url, title]
end end
# Export the URL mappings to a CSV file # Export the URL mappings to a CSV file
@ -1021,7 +1024,7 @@ class GossamerForumsImporter < ImportScripts::Base
result ? result['post_views'] : 0 result ? result['post_views'] : 0
end end
#THREADING OUTLINE HERE -------------------------------------------- ########## THREADING START --------------------------------------------
# Method to dynamically calculate the optimal thread pool size based on system load # Method to dynamically calculate the optimal thread pool size based on system load
def calculate_dynamic_pool_size def calculate_dynamic_pool_size
@ -1085,7 +1088,7 @@ class GossamerForumsImporter < ImportScripts::Base
# Query in batches, create pool, wait for termination, do it again # Query in batches, create pool, wait for termination, do it again
current_post_batch_max = current_post_batch + batch_size current_post_batch_max = current_post_batch + batch_size
# # Static pool size based on number of CPUs ####### # Static pool size based on number of CPUs
# # pool = Concurrent::FixedThreadPool.new(Concurrent.processor_count) # Create a thread pool that is bounded by processors avaialable # # pool = Concurrent::FixedThreadPool.new(Concurrent.processor_count) # Create a thread pool that is bounded by processors avaialable
# # pool = Concurrent::FixedThreadPool.new(8) # Create a thread pool of 8 pool members # # pool = Concurrent::FixedThreadPool.new(8) # Create a thread pool of 8 pool members
@ -1235,7 +1238,7 @@ class GossamerForumsImporter < ImportScripts::Base
# Sanitize and prepare the post message for Discourse # Sanitize and prepare the post message for Discourse
sanitized_post_message = sanitize_post_message(row['post_message']) sanitized_post_message = sanitize_post_message(row['post_message'])
puts "CREATE POST topic.id #{topic.id} discourse_user_id #{discourse_user_id}" puts "CREATE TOPIC POST topic.id #{topic.id} discourse_user_id #{discourse_user_id}"
# Increment the post count for the topic # Increment the post count for the topic
post_number = fetch_db_topic_post_numbers(topic.id).to_i + 1 post_number = fetch_db_topic_post_numbers(topic.id).to_i + 1
@ -1268,7 +1271,7 @@ class GossamerForumsImporter < ImportScripts::Base
insert_url_mapping(row['post_id'], new_url, unique_title) insert_url_mapping(row['post_id'], new_url, unique_title)
# Fetch and import all replies to this topic # Fetch and import all replies to this topic
replies = execute_query_concurrent("SELECT post_id, user_id_fk, post_message, post_time, FROM gforum_Post WHERE post_root_id = #{post_id} ORDER BY post_time ASC", mysql_client) replies = execute_query_concurrent("SELECT post_id, user_id_fk, post_message, post_time FROM gforum_Post WHERE post_root_id = #{post_id} ORDER BY post_time ASC", mysql_client)
# Import each reply sequentially # Import each reply sequentially
replies.each do |reply_row| replies.each do |reply_row|
@ -1285,6 +1288,9 @@ class GossamerForumsImporter < ImportScripts::Base
post_number = fetch_db_topic_post_numbers(topic.id).to_i + 1 post_number = fetch_db_topic_post_numbers(topic.id).to_i + 1
update_db_topic_post_numbers(topic.id, post_number) update_db_topic_post_numbers(topic.id, post_number)
# Fetch the number of views the post has had
reply_post_views = fetch_post_views(reply_row['post_id'])
# Create the reply post in the existing topic # Create the reply post in the existing topic
post = Post.create!( post = Post.create!(
topic_id: topic.id, topic_id: topic.id,
@ -1292,6 +1298,7 @@ class GossamerForumsImporter < ImportScripts::Base
raw: sanitized_reply_message, raw: sanitized_reply_message,
created_at: Time.at(reply_row['post_time']), created_at: Time.at(reply_row['post_time']),
updated_at: Time.at(reply_row['post_time']), updated_at: Time.at(reply_row['post_time']),
reads: reply_post_views || 0,
post_number: post_number post_number: post_number
) )
post.custom_fields['original_gossamer_id'] = reply_row['post_id'] post.custom_fields['original_gossamer_id'] = reply_row['post_id']
@ -1333,7 +1340,8 @@ class GossamerForumsImporter < ImportScripts::Base
end end
#------------------------------------------------------------------- ########## THREADING END --------------------------------------------
# Import topics and posts from Gossamer Forums to Discourse # Import topics and posts from Gossamer Forums to Discourse
def import_topics_and_posts_with_attachments def import_topics_and_posts_with_attachments