You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

453 lines
13 KiB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
6 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
  1. defmodule AutoLinker.Parser do
  2. @moduledoc """
  3. Module to handle parsing the the input string.
  4. """
  5. alias AutoLinker.Builder
  6. @invalid_url ~r/(\.\.+)|(^(\d+\.){1,2}\d+$)/
  7. @match_url ~r{^[\w\.-]+(?:\.[\w\.-]+)+[\w\-\._~%:/?#[\]@!\$&'\(\)\*\+,;=.]+$}
  8. @match_scheme ~r{^(?:\W*)?(?<url>(?:https?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~%:\/?#[\]@!\$&'\(\)\*\+,;=.]+$)}u
  9. @match_phone ~r"((?:x\d{2,7})|(?:(?:\+?1\s?(?:[.-]\s?)?)?(?:\(\s?(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s?\)|(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s?(?:[.-]\s?)?)(?:[2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s?(?:[.-]\s?)?(?:[0-9]{4}))"
  10. @match_hostname ~r{^(?:\W*https?:\/\/)?(?:[^@\n]+\\w@)?(?<host>[^:#~\/\n?]+)}u
  11. @match_ip ~r"^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"
  12. # @user
  13. # @user@example.com
  14. @match_mention ~r"^@[a-zA-Z\d_-]+@[a-zA-Z0-9_-](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*|@[a-zA-Z\d_-]+"u
  15. # https://www.w3.org/TR/html5/forms.html#valid-e-mail-address
  16. @match_email ~r"^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"u
  17. @match_hashtag ~r/^(?<tag>\#[[:word:]_]*[[:alpha:]_·][[:word:]_·\p{M}]*)/u
  18. @prefix_extra [
  19. "magnet:?",
  20. "dweb://",
  21. "dat://",
  22. "gopher://",
  23. "ipfs://",
  24. "ipns://",
  25. "irc://",
  26. "ircs://",
  27. "irc6://",
  28. "mumble://",
  29. "ssb://"
  30. ]
  31. @tlds "./priv/tlds.txt" |> File.read!() |> String.split("\n", trim: true) |> MapSet.new()
  32. @default_opts ~w(url)a
  33. @doc """
  34. Parse the given string, identifying items to link.
  35. Parses the string, replacing the matching urls and phone numbers with an html link.
  36. ## Examples
  37. iex> AutoLinker.Parser.parse("Check out google.com")
  38. ~s{Check out <a href="http://google.com" class="auto-linker" target="_blank" rel="noopener noreferrer">google.com</a>}
  39. iex> AutoLinker.Parser.parse("call me at x9999", phone: true)
  40. ~s{call me at <a href="#" class="phone-number" data-phone="9999">x9999</a>}
  41. iex> AutoLinker.Parser.parse("or at home on 555.555.5555", phone: true)
  42. ~s{or at home on <a href="#" class="phone-number" data-phone="5555555555">555.555.5555</a>}
  43. iex> AutoLinker.Parser.parse(", work (555) 555-5555", phone: true)
  44. ~s{, work <a href="#" class="phone-number" data-phone="5555555555">(555) 555-5555</a>}
  45. """
  46. def parse(input, opts \\ %{})
  47. def parse(input, opts) when is_binary(input), do: {input, nil} |> parse(opts) |> elem(0)
  48. def parse(input, list) when is_list(list), do: parse(input, Enum.into(list, %{}))
  49. def parse(input, opts) do
  50. config =
  51. :auto_linker
  52. |> Application.get_env(:opts, [])
  53. |> Enum.into(%{})
  54. |> Map.put(
  55. :attributes,
  56. Application.get_env(:auto_linker, :attributes, [])
  57. )
  58. opts =
  59. Enum.reduce(@default_opts, opts, fn opt, acc ->
  60. if is_nil(opts[opt]) and is_nil(config[opt]) do
  61. Map.put(acc, opt, true)
  62. else
  63. acc
  64. end
  65. end)
  66. do_parse(input, Map.merge(config, opts))
  67. end
  68. defp do_parse(input, %{phone: false} = opts), do: do_parse(input, Map.delete(opts, :phone))
  69. defp do_parse(input, %{url: false} = opts), do: do_parse(input, Map.delete(opts, :url))
  70. defp do_parse(input, %{phone: _} = opts) do
  71. input
  72. |> do_parse(opts, {"", "", :parsing}, &check_and_link_phone/3)
  73. |> do_parse(Map.delete(opts, :phone))
  74. end
  75. defp do_parse(input, %{hashtag: true} = opts) do
  76. input
  77. |> do_parse(opts, {"", "", :parsing}, &check_and_link_hashtag/3)
  78. |> do_parse(Map.delete(opts, :hashtag))
  79. end
  80. defp do_parse(input, %{extra: true} = opts) do
  81. input
  82. |> do_parse(opts, {"", "", :parsing}, &check_and_link_extra/3)
  83. |> do_parse(Map.delete(opts, :extra))
  84. end
  85. defp do_parse({text, user_acc}, %{markdown: true} = opts) do
  86. text
  87. |> Builder.create_markdown_links(opts)
  88. |> (&{&1, user_acc}).()
  89. |> do_parse(Map.delete(opts, :markdown))
  90. end
  91. defp do_parse(input, %{email: true} = opts) do
  92. input
  93. |> do_parse(opts, {"", "", :parsing}, &check_and_link_email/3)
  94. |> do_parse(Map.delete(opts, :email))
  95. end
  96. defp do_parse({text, user_acc}, %{url: _} = opts) do
  97. input =
  98. with exclude <- Map.get(opts, :exclude_patterns),
  99. true <- is_list(exclude),
  100. true <- String.starts_with?(text, exclude) do
  101. {text, user_acc}
  102. else
  103. _ ->
  104. do_parse(
  105. {text, user_acc},
  106. opts,
  107. {"", "", :parsing},
  108. &check_and_link/3
  109. )
  110. end
  111. do_parse(input, Map.delete(opts, :url))
  112. end
  113. defp do_parse(input, %{mention: true} = opts) do
  114. input
  115. |> do_parse(opts, {"", "", :parsing}, &check_and_link_mention/3)
  116. |> do_parse(Map.delete(opts, :mention))
  117. end
  118. defp do_parse(input, _), do: input
  119. defp do_parse({"", user_acc}, _opts, {"", acc, _}, _handler),
  120. do: {acc, user_acc}
  121. defp do_parse({"<a" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler),
  122. do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<a", :skip}, handler)
  123. defp do_parse({"<pre" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler),
  124. do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<pre", :skip}, handler)
  125. defp do_parse({"<code" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler),
  126. do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<code", :skip}, handler)
  127. defp do_parse({"</a>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler),
  128. do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</a>", :parsing}, handler)
  129. defp do_parse({"</pre>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler),
  130. do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</pre>", :parsing}, handler)
  131. defp do_parse({"</code>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler),
  132. do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</code>", :parsing}, handler)
  133. defp do_parse({"<" <> text, user_acc}, opts, {"", acc, :parsing}, handler),
  134. do: do_parse({text, user_acc}, opts, {"<", acc, {:open, 1}}, handler)
  135. defp do_parse({"<" <> text, user_acc}, opts, {"", acc, {:html, level}}, handler) do
  136. do_parse({text, user_acc}, opts, {"<", acc, {:open, level + 1}}, handler)
  137. end
  138. defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:attrs, level}}, handler),
  139. do:
  140. do_parse(
  141. {text, user_acc},
  142. opts,
  143. {"", acc <> buffer <> ">", {:html, level}},
  144. handler
  145. )
  146. defp do_parse({<<ch::8>> <> text, user_acc}, opts, {"", acc, {:attrs, level}}, handler) do
  147. do_parse({text, user_acc}, opts, {"", acc <> <<ch::8>>, {:attrs, level}}, handler)
  148. end
  149. defp do_parse({"</" <> text, user_acc}, opts, {buffer, acc, {:html, level}}, handler) do
  150. {buffer, user_acc} = run_handler(handler, buffer, opts, user_acc)
  151. do_parse(
  152. {text, user_acc},
  153. opts,
  154. {"", acc <> buffer <> "</", {:close, level}},
  155. handler
  156. )
  157. end
  158. defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:close, 1}}, handler),
  159. do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> ">", :parsing}, handler)
  160. defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:close, level}}, handler),
  161. do:
  162. do_parse(
  163. {text, user_acc},
  164. opts,
  165. {"", acc <> buffer <> ">", {:html, level - 1}},
  166. handler
  167. )
  168. defp do_parse({text, user_acc}, opts, {buffer, acc, {:open, level}}, handler) do
  169. do_parse({text, user_acc}, opts, {"", acc <> buffer, {:attrs, level}}, handler)
  170. end
  171. # default cases where state is not important
  172. defp do_parse(
  173. {" " <> text, user_acc},
  174. %{phone: _} = opts,
  175. {buffer, acc, state},
  176. handler
  177. ),
  178. do: do_parse({text, user_acc}, opts, {buffer <> " ", acc, state}, handler)
  179. defp do_parse(
  180. {<<char::bytes-size(1), text::binary>>, user_acc},
  181. opts,
  182. {buffer, acc, state},
  183. handler
  184. )
  185. when char in [" ", "\r", "\n"] do
  186. {buffer, user_acc} = run_handler(handler, buffer, opts, user_acc)
  187. do_parse(
  188. {text, user_acc},
  189. opts,
  190. {"", acc <> buffer <> char, state},
  191. handler
  192. )
  193. end
  194. defp do_parse({<<ch::8>>, user_acc}, opts, {buffer, acc, state}, handler) do
  195. {buffer, user_acc} = run_handler(handler, buffer <> <<ch::8>>, opts, user_acc)
  196. do_parse(
  197. {"", user_acc},
  198. opts,
  199. {"", acc <> buffer, state},
  200. handler
  201. )
  202. end
  203. defp do_parse({<<ch::8>> <> text, user_acc}, opts, {buffer, acc, state}, handler),
  204. do: do_parse({text, user_acc}, opts, {buffer <> <<ch::8>>, acc, state}, handler)
  205. def check_and_link(buffer, %{scheme: true} = opts, _user_acc) do
  206. if is_url?(buffer, opts[:scheme]) do
  207. case Regex.run(@match_scheme, buffer, capture: [:url]) do
  208. [^buffer] -> link_url(true, buffer, opts)
  209. [url] -> String.replace(buffer, url, link_url(true, url, opts))
  210. end
  211. else
  212. buffer
  213. end
  214. end
  215. def check_and_link(buffer, opts, _user_acc) do
  216. buffer
  217. |> is_url?(opts[:scheme])
  218. |> link_url(buffer, opts)
  219. end
  220. def check_and_link_email(buffer, opts, _user_acc) do
  221. buffer
  222. |> is_email?
  223. |> link_email(buffer, opts)
  224. end
  225. def check_and_link_phone(buffer, opts, _user_acc) do
  226. buffer
  227. |> match_phone
  228. |> link_phone(buffer, opts)
  229. end
  230. def check_and_link_mention(buffer, opts, user_acc) do
  231. buffer
  232. |> match_mention
  233. |> link_mention(buffer, opts, user_acc)
  234. end
  235. def check_and_link_hashtag(buffer, opts, user_acc) do
  236. buffer
  237. |> match_hashtag
  238. |> link_hashtag(buffer, opts, user_acc)
  239. end
  240. def check_and_link_extra("xmpp:" <> handle, opts, _user_acc) do
  241. handle
  242. |> is_email?
  243. |> link_extra("xmpp:" <> handle, opts)
  244. end
  245. def check_and_link_extra(buffer, opts, _user_acc) do
  246. buffer
  247. |> String.starts_with?(@prefix_extra)
  248. |> link_extra(buffer, opts)
  249. end
  250. # @doc false
  251. def is_url?(buffer, true) do
  252. if Regex.match?(@invalid_url, buffer) do
  253. false
  254. else
  255. @match_scheme |> Regex.match?(buffer) |> is_valid_tld?(buffer)
  256. end
  257. end
  258. def is_url?(buffer, _) do
  259. if Regex.match?(@invalid_url, buffer) do
  260. false
  261. else
  262. @match_url |> Regex.match?(buffer) |> is_valid_tld?(buffer)
  263. end
  264. end
  265. def is_email?(buffer) do
  266. if Regex.match?(@invalid_url, buffer) do
  267. false
  268. else
  269. @match_email |> Regex.match?(buffer) |> is_valid_tld?(buffer)
  270. end
  271. end
  272. def is_valid_tld?(true, buffer) do
  273. with [host] <- Regex.run(@match_hostname, buffer, capture: [:host]) do
  274. if is_ip?(host) do
  275. true
  276. else
  277. tld = host |> String.split(".") |> List.last()
  278. MapSet.member?(@tlds, tld)
  279. end
  280. else
  281. _ -> false
  282. end
  283. end
  284. def is_valid_tld?(false, _), do: false
  285. def is_ip?(buffer) do
  286. Regex.match?(@match_ip, buffer)
  287. end
  288. @doc false
  289. def match_phone(buffer) do
  290. case Regex.scan(@match_phone, buffer) do
  291. [] -> nil
  292. other -> other
  293. end
  294. end
  295. def match_mention(buffer) do
  296. case Regex.run(@match_mention, buffer) do
  297. [mention] -> mention
  298. _ -> nil
  299. end
  300. end
  301. def match_hashtag(buffer) do
  302. case Regex.run(@match_hashtag, buffer, capture: [:tag]) do
  303. [hashtag] -> hashtag
  304. _ -> nil
  305. end
  306. end
  307. def link_hashtag(nil, buffer, _, _user_acc), do: buffer
  308. def link_hashtag(hashtag, buffer, %{hashtag_handler: hashtag_handler} = opts, user_acc) do
  309. hashtag
  310. |> hashtag_handler.(buffer, opts, user_acc)
  311. |> maybe_update_buffer(hashtag, buffer)
  312. end
  313. def link_hashtag(hashtag, buffer, opts, _user_acc) do
  314. hashtag
  315. |> Builder.create_hashtag_link(buffer, opts)
  316. |> maybe_update_buffer(hashtag, buffer)
  317. end
  318. def link_mention(nil, buffer, _, user_acc), do: {buffer, user_acc}
  319. def link_mention(mention, buffer, %{mention_handler: mention_handler} = opts, user_acc) do
  320. mention
  321. |> mention_handler.(buffer, opts, user_acc)
  322. |> maybe_update_buffer(mention, buffer)
  323. end
  324. def link_mention(mention, buffer, opts, _user_acc) do
  325. mention
  326. |> Builder.create_mention_link(buffer, opts)
  327. |> maybe_update_buffer(mention, buffer)
  328. end
  329. defp maybe_update_buffer(out, match, buffer) when is_binary(out) do
  330. maybe_update_buffer({out, nil}, match, buffer)
  331. end
  332. defp maybe_update_buffer({out, user_acc}, match, buffer)
  333. when match != buffer and out != buffer do
  334. out = String.replace(buffer, match, out)
  335. {out, user_acc}
  336. end
  337. defp maybe_update_buffer(out, _match, _buffer), do: out
  338. def link_phone(nil, buffer, _), do: buffer
  339. def link_phone(list, buffer, opts) do
  340. Builder.create_phone_link(list, buffer, opts)
  341. end
  342. @doc false
  343. def link_url(true, buffer, opts) do
  344. Builder.create_link(buffer, opts)
  345. end
  346. def link_url(_, buffer, _opts), do: buffer
  347. @doc false
  348. def link_email(true, buffer, opts) do
  349. Builder.create_email_link(buffer, opts)
  350. end
  351. def link_email(_, buffer, _opts), do: buffer
  352. def link_extra(true, buffer, opts) do
  353. Builder.create_extra_link(buffer, opts)
  354. end
  355. def link_extra(_, buffer, _opts), do: buffer
  356. defp run_handler(handler, buffer, opts, user_acc) do
  357. case handler.(buffer, opts, user_acc) do
  358. {buffer, user_acc} -> {buffer, user_acc}
  359. buffer -> {buffer, user_acc}
  360. end
  361. end
  362. end