From 29f66c1e60953039ca67837a9b8a62107cd2f39c Mon Sep 17 00:00:00 2001 From: saint Date: Sat, 17 Aug 2024 18:52:51 +1000 Subject: [PATCH] v0.41.2 Several bug fixes and improvements for fully concurrent topic-post import --- gossamer_forums.rb | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/gossamer_forums.rb b/gossamer_forums.rb index d9357a2..7a6cb0a 100644 --- a/gossamer_forums.rb +++ b/gossamer_forums.rb @@ -1,7 +1,7 @@ # Federated Computer, Inc. # David Sainty 2024 A.D. # Gossamer Threads to Discourse -- Migration-Import Script -# v0.41.1 Fixes for FULL concurrency support +# v0.41.2 Several bug fixes and improvements for fully concurrent topic-post import require 'mysql2' require 'open-uri' @@ -45,6 +45,15 @@ class GossamerForumsImporter < ImportScripts::Base def initialize_sqlite_id_name_url_db @db = SQLite3::Database.new '/bitnami/discourse/sqlite/id_name_url_map.db' + +###### ONLY when we need to clear the url_map and topic_import_status .... e.g. if reimporting topics-posts from scratch +# @db.execute <<-SQL +# DROP TABLE IF EXISTS url_map; +# SQL +# @db.execute <<-SQL +# DROP TABLE IF EXISTS topic_import_status; +# SQL + @db.execute <<-SQL CREATE TABLE IF NOT EXISTS user_id_map ( old_user_id INTEGER PRIMARY KEY, @@ -66,12 +75,6 @@ class GossamerForumsImporter < ImportScripts::Base real_name TEXT ); SQL - - # ONLY when we need to clear the url_map e.g. if reimporting topics-posts from scratch -# @db.execute <<-SQL -# DROP TABLE IF EXISTS url_map; -# SQL - @db.execute <<-SQL CREATE TABLE IF NOT EXISTS url_map ( old_post_id INTEGER PRIMARY KEY, @@ -171,7 +174,7 @@ class GossamerForumsImporter < ImportScripts::Base # Insert a URL mapping into the SQLite database def insert_url_mapping(old_post_id, new_url, title) - @db.execute "INSERT INTO url_map (old_post_id, new_url, title) VALUES (?, ?, ?)", [old_post_id, new_url, title] + @db.execute "INSERT OR REPLACE INTO url_map (old_post_id, new_url, title) VALUES (?, ?, ?)", [old_post_id, new_url, title] end # Export the URL mappings to a CSV file @@ -1021,7 +1024,7 @@ class GossamerForumsImporter < ImportScripts::Base result ? result['post_views'] : 0 end - #THREADING OUTLINE HERE -------------------------------------------- +########## THREADING START -------------------------------------------- # Method to dynamically calculate the optimal thread pool size based on system load def calculate_dynamic_pool_size @@ -1085,7 +1088,7 @@ class GossamerForumsImporter < ImportScripts::Base # Query in batches, create pool, wait for termination, do it again current_post_batch_max = current_post_batch + batch_size -# # Static pool size based on number of CPUs +####### # Static pool size based on number of CPUs # # pool = Concurrent::FixedThreadPool.new(Concurrent.processor_count) # Create a thread pool that is bounded by processors avaialable # # pool = Concurrent::FixedThreadPool.new(8) # Create a thread pool of 8 pool members @@ -1235,7 +1238,7 @@ class GossamerForumsImporter < ImportScripts::Base # Sanitize and prepare the post message for Discourse sanitized_post_message = sanitize_post_message(row['post_message']) - puts "CREATE POST topic.id #{topic.id} discourse_user_id #{discourse_user_id}" + puts "CREATE TOPIC POST topic.id #{topic.id} discourse_user_id #{discourse_user_id}" # Increment the post count for the topic post_number = fetch_db_topic_post_numbers(topic.id).to_i + 1 @@ -1268,7 +1271,7 @@ class GossamerForumsImporter < ImportScripts::Base insert_url_mapping(row['post_id'], new_url, unique_title) # Fetch and import all replies to this topic - replies = execute_query_concurrent("SELECT post_id, user_id_fk, post_message, post_time, FROM gforum_Post WHERE post_root_id = #{post_id} ORDER BY post_time ASC", mysql_client) + replies = execute_query_concurrent("SELECT post_id, user_id_fk, post_message, post_time FROM gforum_Post WHERE post_root_id = #{post_id} ORDER BY post_time ASC", mysql_client) # Import each reply sequentially replies.each do |reply_row| @@ -1285,6 +1288,9 @@ class GossamerForumsImporter < ImportScripts::Base post_number = fetch_db_topic_post_numbers(topic.id).to_i + 1 update_db_topic_post_numbers(topic.id, post_number) + # Fetch the number of views the post has had + reply_post_views = fetch_post_views(reply_row['post_id']) + # Create the reply post in the existing topic post = Post.create!( topic_id: topic.id, @@ -1292,6 +1298,7 @@ class GossamerForumsImporter < ImportScripts::Base raw: sanitized_reply_message, created_at: Time.at(reply_row['post_time']), updated_at: Time.at(reply_row['post_time']), + reads: reply_post_views || 0, post_number: post_number ) post.custom_fields['original_gossamer_id'] = reply_row['post_id'] @@ -1333,7 +1340,8 @@ class GossamerForumsImporter < ImportScripts::Base end - #------------------------------------------------------------------- +########## THREADING END -------------------------------------------- + # Import topics and posts from Gossamer Forums to Discourse def import_topics_and_posts_with_attachments