Post-Migrator/services/slowtwitch/get-images-and-post-html.go

69 lines
1.5 KiB
Go
Raw Permalink Normal View History

2024-05-19 21:00:33 +00:00
package slowtwitch
import (
"github.com/PuerkitoBio/goquery"
"io"
"net/http"
"strings"
)
func GetImagesAndPostHtml(url string) (imagePaths []string, htmlBody string, err error) {
//EXPERIMENT START
res, err := http.Get(url)
if err != nil {
return
}
defer res.Body.Close()
2024-05-20 01:35:53 +00:00
imagePaths = make([]string, 0)
2024-05-19 21:00:33 +00:00
// Read the HTML content
htmlContent, err := io.ReadAll(res.Body)
if err != nil {
return
}
doc, err := goquery.NewDocumentFromReader(strings.NewReader(string(htmlContent)))
if err != nil {
return
}
// Find all image tags and extract their 'src' attributes
doc.Find(".detail_text img").Each(func(i int, img *goquery.Selection) {
imgUrl, exists := img.Attr("src")
if exists {
imagePaths = append(imagePaths, imgUrl)
}
})
// Get blog html, remove first image because wordpress will handle that as a featured image
blog := doc.Find(".detail_text")
2024-05-28 23:45:47 +00:00
blog.Find("a").Each(func(i int, a *goquery.Selection) {
if a.Text() == "Slideshow" {
a.Remove()
}
})
2024-05-19 21:00:33 +00:00
htmlBody, err = blog.Html()
return
}
/*
example of how to switch out nodes
blog.Find("img").Each(func(i int, img *goquery.Selection) {
imgUrl, exists := img.Attr("src")
if exists {
newEle := goquery.NewDocumentFromNode(&html.Node{
Type: html.ElementNode,
Data: "img",
Attr: []html.Attribute{
html.Attribute{
Key: "src",
Val: "www.slowtwitch.cloud" + imgUrl,
},
html.Attribute{
Key: "class",
Val: "class1 class2 class3",
},
},
})
img.AfterSelection(newEle.Selection)
}
img.Remove()
})*/