v0.48.2 batch size 1000, moving to local instance of MariaDB for slowtwitch db

This commit is contained in:
David Sainty 2024-08-19 10:48:40 +10:00
parent f3c91267af
commit bcac1f8881
2 changed files with 40 additions and 11 deletions

View File

@ -102,8 +102,8 @@ class GossamerForumsCleaner
def perform_cleanup def perform_cleanup
puts "Cleanup beginning!" puts "Cleanup beginning!"
# cleanup_messages # cleanup_messages
cleanup_topics # cleanup_topics
cleanup_posts_parallel # cleanup_posts_parallel
# cleanup_categories # cleanup_categories
# cleanup_users # cleanup_users
puts "Cleanup complete!" puts "Cleanup complete!"

View File

@ -1,7 +1,7 @@
# Federated Computer, Inc. # Federated Computer, Inc.
# David Sainty <saint@federated.computer> 2024 A.D. # David Sainty <saint@federated.computer> 2024 A.D.
# Gossamer Threads to Discourse -- Migration-Import Script # Gossamer Threads to Discourse -- Migration-Import Script
# v0.48.1 Further attempts to address MariaDB challenges and make things as foolproof as possible. # v0.48.2 batch size 1000, moving to local instance of MariaDB for slowtwitch db
require 'mysql2' require 'mysql2'
require 'open-uri' require 'open-uri'
@ -30,24 +30,44 @@ class GossamerForumsImporter < ImportScripts::Base
super super
begin begin
# # Database configuration for ActiveRecord
# ActiveRecord::Base.establish_connection(
# adapter: 'mysql2',
# database: 'slowtwitch',
# username: 'admin',
# password: "yxnh93Ybbz2Nm8#mp28zCVv",
# host: 'slowtwitch.northend.network',
# pool: 20, # Adjust based on concurrency needs
# timeout: 5000
# )
# # Initialize MySQL client to connect to Gossamer Forums database
# @mysql_client = Mysql2::Client.new(
# host: "slowtwitch.northend.network",
# username: "admin",
# password: "yxnh93Ybbz2Nm8#mp28zCVv",
# database: "slowtwitch"
# )
# Database configuration for ActiveRecord # Database configuration for ActiveRecord
ActiveRecord::Base.establish_connection( ActiveRecord::Base.establish_connection(
adapter: 'postgresql', adapter: 'mysql2',
database: 'slowtwitch', database: 'slowtwitch',
username: 'admin', username: 'admin',
password: "yxnh93Ybbz2Nm8#mp28zCVv", password: 'x0YGLA9252iiTFQuqaM0ROX8FmQzZuUu',
host: 'slowtwitch.northend.network', host: '172.99.0.10',
pool: 20, # Adjust based on concurrency needs pool: 20, # Adjust based on concurrency needs
timeout: 5000 timeout: 5000
) )
# Initialize MySQL client to connect to Gossamer Forums database # Initialize MySQL client to connect to Gossamer Forums database
@mysql_client = Mysql2::Client.new( @mysql_client = Mysql2::Client.new(
host: "slowtwitch.northend.network", host: "172.99.0.10",
username: "admin", username: "admin",
password: "yxnh93Ybbz2Nm8#mp28zCVv", password: "x0YGLA9252iiTFQuqaM0ROX8FmQzZuUu",
database: "slowtwitch" database: "slowtwitch"
) )
rescue Mysql2::Error => e rescue Mysql2::Error => e
puts "Error connecting to MySQL: #{e.message}" puts "Error connecting to MySQL: #{e.message}"
exit 1 exit 1
@ -1130,11 +1150,19 @@ class GossamerForumsImporter < ImportScripts::Base
# Define the connection pool inside the method # Define the connection pool inside the method
###### mariadb_pool = ConnectionPool.new(size: 14, timeout: 100) do ###### mariadb_pool = ConnectionPool.new(size: 14, timeout: 100) do
# mariadb_pool = ConnectionPool.new(size: 24, timeout: 100) do
# Mysql2::Client.new(
# host: "slowtwitch.northend.network",
# username: "admin",
# password: "yxnh93Ybbz2Nm8#mp28zCVv",
# database: "slowtwitch"
# )
# end
mariadb_pool = ConnectionPool.new(size: 24, timeout: 100) do mariadb_pool = ConnectionPool.new(size: 24, timeout: 100) do
Mysql2::Client.new( Mysql2::Client.new(
host: "slowtwitch.northend.network", host: "172.99.0.10",
username: "admin", username: "admin",
password: "yxnh93Ybbz2Nm8#mp28zCVv", password: "x0YGLA9252iiTFQuqaM0ROX8FmQzZuUu",
database: "slowtwitch" database: "slowtwitch"
) )
end end
@ -1148,7 +1176,8 @@ class GossamerForumsImporter < ImportScripts::Base
parent_post_ids = result.map { |row| row['post_id'] } parent_post_ids = result.map { |row| row['post_id'] }
# parent_post_count = parent_post_ids.count # parent_post_count = parent_post_ids.count
batch_size = 10 # Set our batch size for number of posts to import in a single batch #
batch_size = 1000 # Set our batch size for number of posts to import in a single batch
#### current_post_batch = 0 # Set our current batch number. This tracks the current batch of posts being processed. #### current_post_batch = 0 # Set our current batch number. This tracks the current batch of posts being processed.