Post-Migrator/services/slowtwitch/get-images-and-post-html.go
2024-05-19 15:00:33 -06:00

65 lines
1.4 KiB
Go

package slowtwitch
import (
"github.com/PuerkitoBio/goquery"
"io"
"net/http"
"strings"
)
func GetImagesAndPostHtml(url string) (imagePaths []string, htmlBody string, err error) {
//EXPERIMENT START
res, err := http.Get(url)
if err != nil {
return
}
defer res.Body.Close()
// Read the HTML content
htmlContent, err := io.ReadAll(res.Body)
if err != nil {
return
}
doc, err := goquery.NewDocumentFromReader(strings.NewReader(string(htmlContent)))
if err != nil {
return
}
// Find all image tags and extract their 'src' attributes
doc.Find(".detail_text img").Each(func(i int, img *goquery.Selection) {
imgUrl, exists := img.Attr("src")
if exists {
imagePaths = append(imagePaths, imgUrl)
}
})
// Get blog html, remove first image because wordpress will handle that as a featured image
blog := doc.Find(".detail_text")
blog.Find(":first-child").Remove()
htmlBody, err = blog.Html()
return
}
/*
example of how to switch out nodes
blog.Find("img").Each(func(i int, img *goquery.Selection) {
imgUrl, exists := img.Attr("src")
if exists {
newEle := goquery.NewDocumentFromNode(&html.Node{
Type: html.ElementNode,
Data: "img",
Attr: []html.Attribute{
html.Attribute{
Key: "src",
Val: "www.slowtwitch.cloud" + imgUrl,
},
html.Attribute{
Key: "class",
Val: "class1 class2 class3",
},
},
})
img.AfterSelection(newEle.Selection)
}
img.Remove()
})*/