You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

343 lines
10 KiB

4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
  1. defmodule Linkify.Parser do
  2. @moduledoc """
  3. Module to handle parsing the the input string.
  4. """
  5. alias Linkify.Builder
  6. @invalid_url ~r/(\.\.+)|(^(\d+\.){1,2}\d+$)/
  7. @match_youtube ~r{http(?:s?):\/\/(?:www\.)?youtu(?:be\.com\/watch\?v=|\.be\/)([\w\-\_]*)(&(amp;)?‌​[\w\?‌​=]*)?}u
  8. @match_url ~r{^(?:\W*)?(?<url>(?:https?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~%:\/?#[\]@!\$&'\(\)\*\+,;=.]+$)}u
  9. @match_hostname ~r{^\W*(?<scheme>https?:\/\/)?(?:[^@\n]+\\w@)?(?<host>[^:#~\/\n?]+)}u
  10. @match_ip ~r"^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"
  11. # @user
  12. # @user@example.com
  13. @match_mention ~r"^@[a-zA-Z\d_-]+@[a-zA-Z0-9_-](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*|@[a-zA-Z\d_-]+"u
  14. # https://www.w3.org/TR/html5/forms.html#valid-e-mail-address
  15. @match_email ~r"^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"u
  16. @match_hashtag ~r/^(?<tag>\#[[:word:]_]*[[:alpha:]_·][[:word:]_·\p{M}]*)/u
  17. @prefix_extra [
  18. "magnet:?",
  19. "dweb://",
  20. "dat://",
  21. "gopher://",
  22. "ipfs://",
  23. "ipns://",
  24. "irc://",
  25. "ircs://",
  26. "irc6://",
  27. "mumble://",
  28. "ssb://"
  29. ]
  30. @tlds "./priv/tlds.txt" |> File.read!() |> String.split("\n", trim: true) |> MapSet.new()
  31. @default_opts %{
  32. url: true,
  33. validate_tld: true
  34. }
  35. @doc """
  36. Parse the given string, identifying items to link.
  37. Parses the string, replacing the matching urls with an html link.
  38. ## Examples
  39. iex> Linkify.Parser.parse("Check out google.com")
  40. ~s{Check out <a href="http://google.com">google.com</a>}
  41. """
  42. @types [:url, :email, :hashtag, :mention, :extra, :youtube]
  43. def parse(input, opts \\ %{})
  44. def parse(input, opts) when is_binary(input), do: {input, %{}} |> parse(opts) |> elem(0)
  45. def parse(input, list) when is_list(list), do: parse(input, Enum.into(list, %{}))
  46. def parse(input, opts) do
  47. opts = Map.merge(@default_opts, opts)
  48. Enum.reduce(opts, input, fn
  49. {type, true}, input when type in @types ->
  50. do_parse(input, opts, {"", "", :parsing}, type)
  51. a, input ->
  52. input
  53. end)
  54. end
  55. defp do_parse({"", user_acc}, _opts, {"", acc, _}, _handler),
  56. do: {acc, user_acc}
  57. defp do_parse({"<a" <> text, user_acc}, opts, {buffer, acc, :parsing}, type),
  58. do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<a", :skip}, type)
  59. defp do_parse({"<pre" <> text, user_acc}, opts, {buffer, acc, :parsing}, type),
  60. do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<pre", :skip}, type)
  61. defp do_parse({"<code" <> text, user_acc}, opts, {buffer, acc, :parsing}, type),
  62. do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<code", :skip}, type)
  63. defp do_parse({"</a>" <> text, user_acc}, opts, {buffer, acc, :skip}, type),
  64. do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</a>", :parsing}, type)
  65. defp do_parse({"</pre>" <> text, user_acc}, opts, {buffer, acc, :skip}, type),
  66. do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</pre>", :parsing}, type)
  67. defp do_parse({"</code>" <> text, user_acc}, opts, {buffer, acc, :skip}, type),
  68. do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</code>", :parsing}, type)
  69. defp do_parse({"<" <> text, user_acc}, opts, {"", acc, :parsing}, type),
  70. do: do_parse({text, user_acc}, opts, {"<", acc, {:open, 1}}, type)
  71. defp do_parse({"<" <> text, user_acc}, opts, {"", acc, {:html, level}}, type) do
  72. do_parse({text, user_acc}, opts, {"<", acc, {:open, level + 1}}, type)
  73. end
  74. defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:attrs, level}}, type),
  75. do:
  76. do_parse(
  77. {text, user_acc},
  78. opts,
  79. {"", acc <> buffer <> ">", {:html, level}},
  80. type
  81. )
  82. defp do_parse({<<ch::8>> <> text, user_acc}, opts, {"", acc, {:attrs, level}}, type) do
  83. do_parse({text, user_acc}, opts, {"", acc <> <<ch::8>>, {:attrs, level}}, type)
  84. end
  85. defp do_parse({"</" <> text, user_acc}, opts, {buffer, acc, {:html, level}}, type) do
  86. {buffer, user_acc} = link(type, buffer, opts, user_acc)
  87. do_parse(
  88. {text, user_acc},
  89. opts,
  90. {"", acc <> buffer <> "</", {:close, level}},
  91. type
  92. )
  93. end
  94. defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:close, 1}}, type),
  95. do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> ">", :parsing}, type)
  96. defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:close, level}}, type),
  97. do:
  98. do_parse(
  99. {text, user_acc},
  100. opts,
  101. {"", acc <> buffer <> ">", {:html, level - 1}},
  102. type
  103. )
  104. defp do_parse({text, user_acc}, opts, {buffer, acc, {:open, level}}, type) do
  105. do_parse({text, user_acc}, opts, {"", acc <> buffer, {:attrs, level}}, type)
  106. end
  107. defp do_parse(
  108. {<<char::bytes-size(1), text::binary>>, user_acc},
  109. opts,
  110. {buffer, acc, state},
  111. type
  112. )
  113. when char in [" ", "\r", "\n"] do
  114. {buffer, user_acc} = link(type, buffer, opts, user_acc)
  115. do_parse(
  116. {text, user_acc},
  117. opts,
  118. {"", acc <> buffer <> char, state},
  119. type
  120. )
  121. end
  122. defp do_parse({<<ch::8>>, user_acc}, opts, {buffer, acc, state}, type) do
  123. {buffer, user_acc} = link(type, buffer <> <<ch::8>>, opts, user_acc)
  124. do_parse(
  125. {"", user_acc},
  126. opts,
  127. {"", acc <> buffer, state},
  128. type
  129. )
  130. end
  131. defp do_parse({<<ch::8>> <> text, user_acc}, opts, {buffer, acc, state}, type),
  132. do: do_parse({text, user_acc}, opts, {buffer <> <<ch::8>>, acc, state}, type)
  133. def check_and_link(:youtube, buffer, opts, _user_acc) do
  134. if youtube?(buffer, opts), do: link_youtube(buffer, opts), else: buffer
  135. end
  136. def check_and_link(:url, buffer, opts, _user_acc) do
  137. str = strip_parens(buffer)
  138. if url?(str, opts) do
  139. case @match_url |> Regex.run(str, capture: [:url]) |> hd() do
  140. ^buffer -> link_url(buffer, opts)
  141. url -> String.replace(buffer, url, link_url(url, opts))
  142. end
  143. else
  144. buffer
  145. end
  146. end
  147. def check_and_link(:email, buffer, opts, _user_acc) do
  148. if email?(buffer, opts), do: link_email(buffer, opts), else: buffer
  149. end
  150. def check_and_link(:mention, buffer, opts, user_acc) do
  151. buffer
  152. |> match_mention
  153. |> link_mention(buffer, opts, user_acc)
  154. end
  155. def check_and_link(:hashtag, buffer, opts, user_acc) do
  156. buffer
  157. |> match_hashtag
  158. |> link_hashtag(buffer, opts, user_acc)
  159. end
  160. def check_and_link(:extra, "xmpp:" <> handle, opts, _user_acc) do
  161. if email?(handle, opts), do: link_extra("xmpp:" <> handle, opts), else: handle
  162. end
  163. def check_and_link(:extra, buffer, opts, _user_acc) do
  164. if String.starts_with?(buffer, @prefix_extra), do: link_extra(buffer, opts), else: buffer
  165. end
  166. defp strip_parens("(" <> buffer) do
  167. ~r/[^\)]*/ |> Regex.run(buffer) |> hd()
  168. end
  169. defp strip_parens(buffer), do: buffer
  170. def youtube?(buffer, opts) do
  171. valid_url?(buffer) && Regex.match?(@match_youtube, buffer) && valid_tld?(buffer, opts)
  172. end
  173. def url?(buffer, opts) do
  174. valid_url?(buffer) && Regex.match?(@match_url, buffer) && valid_tld?(buffer, opts)
  175. end
  176. def email?(buffer, opts) do
  177. valid_url?(buffer) && Regex.match?(@match_email, buffer) && valid_tld?(buffer, opts)
  178. end
  179. defp valid_url?(url), do: !Regex.match?(@invalid_url, url)
  180. @doc """
  181. Validates a URL's TLD. Returns a boolean.
  182. Will return `true` if `:validate_tld` option set to `false`.
  183. Will skip validation and return `true` if `:validate_tld` set to `:no_scheme` and the url has a scheme.
  184. """
  185. def valid_tld?(url, opts) do
  186. [scheme, host] = Regex.run(@match_hostname, url, capture: [:scheme, :host])
  187. cond do
  188. opts[:validate_tld] == false ->
  189. true
  190. ip?(host) ->
  191. true
  192. # don't validate if scheme is present
  193. opts[:validate_tld] == :no_scheme and scheme != "" ->
  194. true
  195. true ->
  196. tld = host |> String.split(".") |> List.last()
  197. MapSet.member?(@tlds, tld)
  198. end
  199. end
  200. def ip?(buffer), do: Regex.match?(@match_ip, buffer)
  201. def match_mention(buffer) do
  202. case Regex.run(@match_mention, buffer) do
  203. [mention] -> mention
  204. _ -> nil
  205. end
  206. end
  207. def match_hashtag(buffer) do
  208. case Regex.run(@match_hashtag, buffer, capture: [:tag]) do
  209. [hashtag] -> hashtag
  210. _ -> nil
  211. end
  212. end
  213. def link_hashtag(nil, buffer, _, _user_acc), do: buffer
  214. def link_hashtag(hashtag, buffer, %{hashtag_handler: hashtag_handler} = opts, user_acc) do
  215. hashtag
  216. |> hashtag_handler.(buffer, opts, user_acc)
  217. |> maybe_update_buffer(hashtag, buffer)
  218. end
  219. def link_hashtag(hashtag, buffer, opts, _user_acc) do
  220. hashtag
  221. |> Builder.create_hashtag_link(buffer, opts)
  222. |> maybe_update_buffer(hashtag, buffer)
  223. end
  224. def link_mention(nil, buffer, _, user_acc), do: {buffer, user_acc}
  225. def link_mention(mention, buffer, %{mention_handler: mention_handler} = opts, user_acc) do
  226. mention
  227. |> mention_handler.(buffer, opts, user_acc)
  228. |> maybe_update_buffer(mention, buffer)
  229. end
  230. def link_mention(mention, buffer, opts, _user_acc) do
  231. mention
  232. |> Builder.create_mention_link(buffer, opts)
  233. |> maybe_update_buffer(mention, buffer)
  234. end
  235. defp maybe_update_buffer(out, match, buffer) when is_binary(out) do
  236. maybe_update_buffer({out, nil}, match, buffer)
  237. end
  238. defp maybe_update_buffer({out, user_acc}, match, buffer)
  239. when match != buffer and out != buffer do
  240. out = String.replace(buffer, match, out)
  241. {out, user_acc}
  242. end
  243. defp maybe_update_buffer(out, _match, _buffer), do: out
  244. @doc false
  245. def link_youtube(buffer, opts) do
  246. Builder.create_youtube(buffer, opts)
  247. end
  248. @doc false
  249. def link_url(buffer, opts) do
  250. Builder.create_link(buffer, opts)
  251. end
  252. @doc false
  253. def link_email(buffer, opts) do
  254. Builder.create_email_link(buffer, opts)
  255. end
  256. def link_extra(buffer, opts) do
  257. Builder.create_extra_link(buffer, opts)
  258. end
  259. defp link(type, buffer, opts, user_acc) do
  260. case check_and_link(type, buffer, opts, user_acc) do
  261. {buffer, user_acc} -> {buffer, user_acc}
  262. buffer -> {buffer, user_acc}
  263. end
  264. end
  265. end