chore: Improve email parsing using email trimmer gem (#3611)

Email parsing using email_trimmer gem

Fixes: #3539 , #2954, #3572
This commit is contained in:
Tejaswini Chile 2021-12-22 18:16:40 +05:30 committed by GitHub
parent 009abc1948
commit 44486fc8e1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 1232 additions and 32 deletions

View file

@ -0,0 +1,31 @@
class HtmlParser
def self.parse_reply(raw_body)
new(raw_body).filtered_text
end
attr_reader :raw_body
def initialize(raw_body)
@raw_body = raw_body
end
def document
@document ||= Nokogiri::HTML(raw_body)
end
def filter_replies!
document.xpath('//blockquote').each { |n| n.replace('> ') }
document.xpath('//table').each(&:remove)
end
def filtered_html
@filtered_html ||= begin
filter_replies!
document.inner_html
end
end
def filtered_text
@filtered_text ||= Html2Text.convert(filtered_html)
end
end