feat: keep only one method to get title of a page

This commit is contained in:
2025-11-11 16:19:09 +01:00
parent 325cbe9c2c
commit c2d4357262

View File

@@ -262,28 +262,7 @@ func (s *URLMetadataService) FetchTitle(ctx context.Context, rawURL string) (str
}
func (s *URLMetadataService) ExtractTitleFromHTML(html string) string {
if title := s.ExtractFromTitleTag(html); title != "" {
return title
}
if title := s.ExtractFromOpenGraph(html); title != "" {
return title
}
if title := s.ExtractFromJSONLD(html); title != "" {
return title
}
if title := s.ExtractFromTwitterCard(html); title != "" {
return title
}
if title := s.extractFromMetaTags(html); title != "" {
return title
}
return ""
return s.ExtractFromTitleTag(html)
}
func (s *URLMetadataService) ExtractFromTitleTag(htmlContent string) string {
@@ -313,109 +292,6 @@ func (s *URLMetadataService) ExtractFromTitleTag(htmlContent string) string {
}
}
func (s *URLMetadataService) ExtractFromOpenGraph(htmlContent string) string {
lines := strings.Split(htmlContent, "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
if strings.Contains(strings.ToLower(line), `property="og:title"`) && strings.Contains(line, `content="`) {
start := strings.Index(line, `content="`)
if start != -1 {
start += 9
end := strings.Index(line[start:], `"`)
if end != -1 {
title := line[start : start+end]
cleaned := s.optimizedTitleClean(title)
if cleaned != "" {
return cleaned
}
}
}
}
}
return ""
}
func (s *URLMetadataService) ExtractFromJSONLD(htmlContent string) string {
lines := strings.Split(htmlContent, "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
if strings.Contains(line, `"@type":"VideoObject"`) || strings.Contains(line, `"@type":"WebPage"`) {
if strings.Contains(line, `"name":`) {
start := strings.Index(line, `"name":`)
if start != -1 {
start += 7
for i := start; i < len(line); i++ {
if line[i] == '"' {
start = i + 1
break
}
}
end := strings.Index(line[start:], `"`)
if end != -1 {
title := line[start : start+end]
cleaned := s.optimizedTitleClean(title)
if cleaned != "" {
return cleaned
}
}
}
}
}
}
return ""
}
func (s *URLMetadataService) ExtractFromTwitterCard(htmlContent string) string {
lines := strings.Split(htmlContent, "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
if strings.Contains(strings.ToLower(line), `name="twitter:title"`) && strings.Contains(line, `content="`) {
start := strings.Index(line, `content="`)
if start != -1 {
start += 9
end := strings.Index(line[start:], `"`)
if end != -1 {
title := line[start : start+end]
cleaned := s.optimizedTitleClean(title)
if cleaned != "" {
return cleaned
}
}
}
}
}
return ""
}
func (s *URLMetadataService) extractFromMetaTags(htmlContent string) string {
lines := strings.Split(htmlContent, "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
if strings.Contains(strings.ToLower(line), `name="title"`) && strings.Contains(line, `content="`) {
start := strings.Index(line, `content="`)
if start != -1 {
start += 9
end := strings.Index(line[start:], `"`)
if end != -1 {
title := line[start : start+end]
cleaned := s.optimizedTitleClean(title)
if cleaned != "" {
return cleaned
}
}
}
}
}
return ""
}
func (s *URLMetadataService) optimizedTitleClean(title string) string {
if title == "" {
return ""