nvrim/buffer/
token_under_cursor.rs

1//! Token classification under cursor (URL / file / directory / word).
2//!
3//! Retrieves current line + cursor column, extracts contiguous non‑whitespace token, classifies via
4//! filesystem inspection or URL parsing, returning a tagged Lua table.
5
6use color_eyre::eyre::Context;
7use color_eyre::eyre::bail;
8use nvim_oxi::Object;
9use nvim_oxi::api::Buffer;
10use nvim_oxi::api::Window;
11use nvim_oxi::conversion::ToObject;
12use nvim_oxi::lua::ffi::State;
13use nvim_oxi::serde::Serializer;
14use serde::Serialize;
15use url::Url;
16use ytil_noxi::buffer::BufferExt;
17use ytil_noxi::buffer::CursorPosition;
18use ytil_sys::file::FileCmdOutput;
19use ytil_sys::lsof::ProcessFilter;
20
21/// Retrieve and classify the non-whitespace token under the cursor in the current window.
22///
23/// Returns [`Option::None`] if the current line or cursor position cannot be obtained,
24/// or if the cursor is on whitespace. On errors a notification is emitted to Nvim.
25/// On success returns a classified [`TokenUnderCursor`].
26pub fn get(_: ()) -> Option<TokenUnderCursor> {
27    let current_buffer = nvim_oxi::api::get_current_buf();
28    let cursor_pos = CursorPosition::get_current()?;
29
30    let token_under_cursor = if current_buffer.is_terminal() {
31        get_token_under_cursor_in_terminal_buffer(&current_buffer, &cursor_pos)
32    } else {
33        get_token_under_cursor_in_normal_buffer(&cursor_pos)
34    }
35    .as_deref()
36    .map(TokenUnderCursor::classify)?
37    .inspect_err(|err| ytil_noxi::notify::error(format!("error classifying word under cursor | error={err:?}")))
38    .ok()?;
39
40    let token_under_cursor = token_under_cursor
41        .refine_word(&current_buffer)
42        .inspect_err(|err| ytil_noxi::notify::error(format!("error refining word under cursor | error={err:?}")))
43        .ok()?;
44
45    Some(token_under_cursor)
46}
47
48fn get_token_under_cursor_in_terminal_buffer(buffer: &Buffer, cursor_pos: &CursorPosition) -> Option<String> {
49    let window_width = Window::current()
50        .get_width()
51        .wrap_err("error getting window width")
52        .and_then(|x| {
53            usize::try_from(x).wrap_err_with(|| format!("error converting window width to usize | width={x}"))
54        })
55        .inspect_err(|err| ytil_noxi::notify::error(format!("{err}")))
56        .ok()?
57        .saturating_sub(1);
58
59    let mut out = vec![];
60    let mut word_end_idx = 0;
61    for (idx, current_char) in ytil_noxi::buffer::get_current_line()?.char_indices() {
62        word_end_idx = idx;
63        if idx < cursor_pos.col {
64            if current_char.is_ascii_whitespace() {
65                out.clear();
66            } else {
67                out.push(current_char);
68            }
69        } else if idx > cursor_pos.col {
70            if current_char.is_ascii_whitespace() {
71                break;
72            }
73            out.push(current_char);
74        } else if current_char.is_ascii_whitespace() {
75            out.clear();
76            out.push(current_char);
77            break;
78        } else {
79            out.push(current_char);
80        }
81    }
82
83    // Check rows before the cursor one.
84    if word_end_idx.saturating_sub(out.len()) == 0 {
85        'outer: for idx in (0..cursor_pos.row.saturating_sub(1)).rev() {
86            let line = buffer.get_line(idx).ok()?.to_string_lossy().to_string();
87            if line.is_empty() {
88                break 'outer;
89            }
90            if let Some((_, prev)) = line.rsplit_once(' ') {
91                out.splice(0..0, prev.chars());
92                break;
93            }
94            if line.chars().count() < window_width {
95                break;
96            }
97            out.splice(0..0, line.chars());
98        }
99    }
100
101    // Check rows after the cursor one.
102    if word_end_idx >= window_width {
103        'outer: for idx in cursor_pos.row..usize::MAX {
104            let line = buffer.get_line(idx).ok()?.to_string_lossy().to_string();
105            if line.is_empty() {
106                break 'outer;
107            }
108            if let Some((next, _)) = line.split_once(' ') {
109                out.extend(next.chars());
110                break;
111            }
112            out.extend(line.chars());
113            if line.chars().count() < window_width {
114                break;
115            }
116        }
117    }
118
119    Some(out.into_iter().collect())
120}
121
122fn get_token_under_cursor_in_normal_buffer(cursor_pos: &CursorPosition) -> Option<String> {
123    let current_line = ytil_noxi::buffer::get_current_line()?;
124    get_word_at_index(&current_line, cursor_pos.col).map(ToOwned::to_owned)
125}
126
127/// Classified representation of the token found under the cursor.
128///
129/// Used to distinguish between:
130/// - URLs
131/// - existing binary files
132/// - existing text files
133/// - existing directories
134/// - plain tokens (fallback [`TokenUnderCursor::MaybeTextFile`])
135///
136/// Serialized to Lua as a tagged table (`{ kind = "...", value = "..." }`).
137#[derive(Clone, Debug, Serialize)]
138#[serde(tag = "kind", content = "value")]
139#[cfg_attr(test, derive(Eq, PartialEq))]
140pub enum TokenUnderCursor {
141    /// A string that successfully parsed as a [`Url`] via [`Url::parse`].
142    Url(String),
143    /// A filesystem path identified as a binary file by [`ytil_sys::file::exec_file_cmd`].
144    BinaryFile(String),
145    /// A filesystem path identified as a text file by [`ytil_sys::file::exec_file_cmd`].
146    TextFile {
147        path: String,
148        lnum: Option<i64>,
149        col: Option<i64>,
150    },
151    /// A filesystem path identified as a directory by [`ytil_sys::file::exec_file_cmd`].
152    Directory(String),
153    /// A fallback plain token (word) when no more specific classification applied.
154    MaybeTextFile {
155        value: String,
156        lnum: Option<i64>,
157        col: Option<i64>,
158    },
159}
160
161impl nvim_oxi::lua::Pushable for TokenUnderCursor {
162    unsafe fn push(self, lstate: *mut State) -> Result<std::ffi::c_int, nvim_oxi::lua::Error> {
163        unsafe {
164            self.to_object()
165                .map_err(nvim_oxi::lua::Error::push_error_from_err::<Self, _>)?
166                .push(lstate)
167        }
168    }
169}
170
171impl ToObject for TokenUnderCursor {
172    fn to_object(self) -> Result<Object, nvim_oxi::conversion::Error> {
173        self.serialize(Serializer::new()).map_err(Into::into)
174    }
175}
176
177/// Classify a [`String`] captured under the cursor into a [`TokenUnderCursor`].
178///
179/// 1. If it parses as a URL with [`Url::parse`], returns [`TokenUnderCursor::Url`].
180/// 2. Otherwise, invokes [`ytil_sys::file::exec_file_cmd`] to check filesystem type.
181/// 3. Falls back to [`TokenUnderCursor::MaybeTextFile`] on errors or unknown kinds.
182impl TokenUnderCursor {
183    fn classify(value: &str) -> color_eyre::Result<Self> {
184        Self::classify_url(value).or_else(|_| Self::classify_not_url(value))
185    }
186
187    fn classify_url(value: &str) -> color_eyre::Result<Self> {
188        let value = value
189            .trim_matches('"')
190            .trim_matches('`')
191            .trim_matches('\'')
192            .trim_start_matches('[')
193            .trim_end_matches(']')
194            .trim_start_matches('(')
195            .trim_end_matches(')')
196            .trim_start_matches('{')
197            .trim_end_matches('}');
198
199        let maybe_md_link = extract_markdown_link(value)
200            .or_else(|| extract_https_or_http_link(value))
201            .unwrap_or(value);
202
203        Ok(Url::parse(maybe_md_link).map(|_| Self::Url(maybe_md_link.to_string()))?)
204    }
205
206    fn classify_not_url(value: &str) -> color_eyre::Result<Self> {
207        let mut parts = value.split(':');
208
209        let Some(maybe_path) = parts.next() else {
210            return Ok(Self::MaybeTextFile {
211                value: value.to_string(),
212                lnum: None,
213                col: None,
214            });
215        };
216
217        let lnum = parts.next().map(str::parse).transpose().ok().flatten();
218        let col = parts.next().map(str::parse).transpose().ok().flatten();
219
220        Ok(match ytil_sys::file::exec_file_cmd(maybe_path)? {
221            FileCmdOutput::BinaryFile(x) => Self::BinaryFile(x),
222            FileCmdOutput::TextFile(path) => Self::TextFile { path, lnum, col },
223            FileCmdOutput::Directory(x) => Self::Directory(x),
224            FileCmdOutput::NotFound(path) | FileCmdOutput::Unknown(path) => {
225                Self::MaybeTextFile { value: path, lnum, col }
226            }
227        })
228    }
229
230    fn refine_word(&self, buffer: &Buffer) -> color_eyre::Result<Self> {
231        if let Self::MaybeTextFile { value, lnum, col } = self {
232            let pid = buffer.get_pid()?;
233
234            let mut lsof_res = ytil_sys::lsof::lsof(&ProcessFilter::Pid(&pid))?;
235
236            let Some(process_desc) = lsof_res.get_mut(0) else {
237                bail!("error no process found for pid | pid={pid:?}");
238            };
239
240            let maybe_path = {
241                process_desc.cwd.push(value);
242                let mut tmp = process_desc.cwd.to_string_lossy().to_string();
243                if let Some(lnum) = lnum {
244                    tmp.push(':');
245                    tmp.push_str(&lnum.to_string());
246                }
247                if let Some(col) = col {
248                    tmp.push(':');
249                    tmp.push_str(&col.to_string());
250                }
251                tmp
252            };
253
254            return Self::classify_not_url(&maybe_path);
255        }
256        Ok(self.clone())
257    }
258}
259
260/// Find the non-whitespace token in the supplied string `s` containing the visual index `idx`.
261///
262/// Returns [`Option::None`] if:
263/// - `idx` Is out of bounds.
264/// - `idx` Does not point to a character boundary.
265/// - The character at `idx` is whitespace
266fn get_word_at_index(s: &str, idx: usize) -> Option<&str> {
267    let byte_idx = convert_visual_to_byte_idx(s, idx)?;
268
269    // If pointing to whitespace, no word.
270    if s[byte_idx..].chars().next().is_some_and(char::is_whitespace) {
271        return None;
272    }
273
274    // Scan split words and see which span contains `byte_idx`.
275    let mut pos = 0;
276    for word in s.split_ascii_whitespace() {
277        let start = s[pos..].find(word)?.saturating_add(pos);
278        let end = start.saturating_add(word.len());
279        if (start..=end).contains(&byte_idx) {
280            return Some(word);
281        }
282        pos = end;
283    }
284    None
285}
286
287/// Convert a visual (character) index into a byte index for the supplied string `s`.
288///
289/// Returns:
290/// - [`Option::Some`] with the corresponding byte index (including `s.len()` for end-of-line)
291/// - [`Option::None`] if `idx` is past the end
292fn convert_visual_to_byte_idx(s: &str, idx: usize) -> Option<usize> {
293    let mut chars_seen = 0usize;
294    let mut byte_idx = None;
295    for (b, _) in s.char_indices() {
296        if chars_seen == idx {
297            byte_idx = Some(b);
298            break;
299        }
300        chars_seen = chars_seen.saturating_add(1);
301    }
302    if byte_idx.is_some() {
303        return byte_idx;
304    }
305    if idx == chars_seen {
306        return Some(s.len());
307    }
308    None
309}
310
311fn extract_markdown_link(input: &str) -> Option<&str> {
312    let mid_idx = input.find("](")?;
313    let start_idx = mid_idx.saturating_add(2);
314
315    input.get(start_idx..)?.find(')').map_or_else(
316        || input.get(start_idx..),
317        |end_relative| input.get(start_idx..start_idx.saturating_add(end_relative)),
318    )
319}
320
321#[allow(clippy::similar_names)]
322fn extract_https_or_http_link(input: &str) -> Option<&str> {
323    let start_idx = match (input.find("https://"), input.find("http://")) {
324        (None, None) => None,
325        (None, Some(start_idx)) | (Some(start_idx), None) => Some(start_idx),
326        (Some(start_https_idx), Some(start_http_idx)) => Some(if start_https_idx <= start_http_idx {
327            start_https_idx
328        } else {
329            start_http_idx
330        }),
331    }?;
332    if let Some(end_idx) = input.find(' ') {
333        return input.get(start_idx..end_idx);
334    }
335    input.get(start_idx..)
336}
337
338#[cfg(test)]
339mod tests {
340    use rstest::*;
341    #[cfg(target_os = "macos")]
342    use tempfile::NamedTempFile;
343    #[cfg(target_os = "macos")]
344    use tempfile::TempDir;
345
346    use super::*;
347
348    #[rstest]
349    #[case("open file.txt now", 7, Some("file.txt"))]
350    #[case("yes run main.rs", 8, Some("main.rs"))]
351    #[case("yes run main.rs", 14, Some("main.rs"))]
352    #[case("hello  world", 5, None)]
353    #[case("hello  world", 6, None)]
354    #[case("/usr/local/bin", 0, Some("/usr/local/bin"))]
355    #[case("/usr/local/bin", 14, Some("/usr/local/bin"))]
356    #[case("print(arg)", 5, Some("print(arg)"))]
357    #[case("abc", 10, None)]
358    #[case("αβ γ", 0, Some("αβ"))]
359    #[case("αβ γ", 1, Some("αβ"))]
360    #[case("αβ γ", 4, Some("γ"))]
361    #[case("αβ γ", 5, None)]
362    #[case("hello\nworld", 0, Some("hello"))]
363    #[case("hello\nworld", 6, Some("world"))]
364    #[case("hello\nworld", 5, None)]
365    #[case("hello\n\nworld", 5, None)]
366    #[case("hello\n\nworld", 6, None)]
367    fn get_word_at_index_scenarios(#[case] s: &str, #[case] idx: usize, #[case] expected: Option<&str>) {
368        pretty_assertions::assert_eq!(get_word_at_index(s, idx), expected);
369    }
370
371    // Tests are skipped in CI because [`TokenUnderCursor::from`] calls `file` command and that
372    // behaves differently based on the platform (e.g. macOS vs Linux)
373
374    #[test]
375    #[cfg(target_os = "macos")]
376    fn token_under_cursor_classify_valid_url_returns_url() {
377        let input = "https://example.com".to_string();
378        let result = TokenUnderCursor::classify(&input);
379        assert2::let_assert!(Ok(actual) = result);
380        pretty_assertions::assert_eq!(actual, TokenUnderCursor::Url(input));
381    }
382
383    #[test]
384    #[cfg(target_os = "macos")]
385    fn token_under_cursor_classify_invalid_url_plain_word_returns_word() {
386        let input = "noturl".to_string();
387        let result = TokenUnderCursor::classify(&input);
388        assert2::let_assert!(Ok(actual) = result);
389        pretty_assertions::assert_eq!(
390            actual,
391            TokenUnderCursor::MaybeTextFile {
392                value: input,
393                lnum: None,
394                col: None
395            }
396        );
397    }
398
399    #[test]
400    #[cfg(target_os = "macos")]
401    fn token_under_cursor_classify_path_to_text_file_returns_text_file() {
402        let mut temp_file = NamedTempFile::new().unwrap();
403        std::io::Write::write_all(&mut temp_file, b"hello world").unwrap();
404        let path = temp_file.path().to_string_lossy().to_string();
405        let result = TokenUnderCursor::classify(&path);
406        assert2::let_assert!(Ok(actual) = result);
407        pretty_assertions::assert_eq!(
408            actual,
409            TokenUnderCursor::TextFile {
410                path,
411                lnum: None,
412                col: None
413            }
414        );
415    }
416
417    #[test]
418    #[cfg(target_os = "macos")]
419    fn token_under_cursor_classify_path_lnum_to_text_file_returns_text_file_with_lnum() {
420        let mut temp_file = NamedTempFile::new().unwrap();
421        std::io::Write::write_all(&mut temp_file, b"hello world").unwrap();
422        let path = temp_file.path().to_string_lossy().to_string();
423        let result = TokenUnderCursor::classify(&format!("{path}:10"));
424        assert2::let_assert!(Ok(actual) = result);
425        pretty_assertions::assert_eq!(
426            actual,
427            TokenUnderCursor::TextFile {
428                path,
429                lnum: Some(10),
430                col: None
431            }
432        );
433    }
434
435    #[test]
436    #[cfg(target_os = "macos")]
437    fn token_under_cursor_classify_path_lnum_col_to_text_file_returns_text_file_with_lnum_col() {
438        let mut temp_file = NamedTempFile::new().unwrap();
439        std::io::Write::write_all(&mut temp_file, b"hello world").unwrap();
440        let path = temp_file.path().to_string_lossy().to_string();
441        let result = TokenUnderCursor::classify(&format!("{path}:10:5"));
442        assert2::let_assert!(Ok(actual) = result);
443        pretty_assertions::assert_eq!(
444            actual,
445            TokenUnderCursor::TextFile {
446                path,
447                lnum: Some(10),
448                col: Some(5)
449            }
450        );
451    }
452
453    #[test]
454    #[cfg(target_os = "macos")]
455    fn token_under_cursor_classify_path_to_directory_returns_directory() {
456        let temp_dir = TempDir::new().unwrap();
457        let path = temp_dir.path().to_string_lossy().to_string();
458        let result = TokenUnderCursor::classify(&path);
459        assert2::let_assert!(Ok(actual) = result);
460        pretty_assertions::assert_eq!(actual, TokenUnderCursor::Directory(path));
461    }
462
463    #[test]
464    #[cfg(target_os = "macos")]
465    fn token_under_cursor_classify_path_to_binary_file_returns_binary_file() {
466        let mut temp_file = NamedTempFile::new().unwrap();
467        // Write some binary data
468        std::io::Write::write_all(&mut temp_file, &[0, 1, 2, 255]).unwrap();
469        let path = temp_file.path().to_string_lossy().to_string();
470        let result = TokenUnderCursor::classify(&path);
471        assert2::let_assert!(Ok(actual) = result);
472        pretty_assertions::assert_eq!(actual, TokenUnderCursor::BinaryFile(path));
473    }
474
475    #[test]
476    #[cfg(target_os = "macos")]
477    fn token_under_cursor_classify_nonexistent_path_returns_maybe_text_file() {
478        let path = "/nonexistent/path".to_string();
479        let result = TokenUnderCursor::classify(&path);
480        assert2::let_assert!(Ok(actual) = result);
481        pretty_assertions::assert_eq!(
482            actual,
483            TokenUnderCursor::MaybeTextFile {
484                value: path,
485                lnum: None,
486                col: None
487            }
488        );
489    }
490
491    #[test]
492    #[cfg(target_os = "macos")]
493    fn token_under_cursor_classify_path_with_invalid_lnum_returns_maybe_text_file() {
494        let temp_file = NamedTempFile::new().unwrap();
495        let path = temp_file.path().to_string_lossy().to_string();
496        let input = format!("{path}:invalid");
497        let result = TokenUnderCursor::classify(&input);
498        assert2::let_assert!(Ok(actual) = result);
499        pretty_assertions::assert_eq!(
500            actual,
501            TokenUnderCursor::MaybeTextFile {
502                value: path,
503                lnum: None,
504                col: None
505            }
506        );
507    }
508
509    #[test]
510    #[cfg(target_os = "macos")]
511    fn token_under_cursor_classify_path_with_invalid_col_returns_maybe_text_file() {
512        let temp_file = NamedTempFile::new().unwrap();
513        let path = temp_file.path().to_string_lossy().to_string();
514        let input = format!("{path}:10:invalid");
515        let result = TokenUnderCursor::classify(&input);
516        assert2::let_assert!(Ok(actual) = result);
517        pretty_assertions::assert_eq!(
518            actual,
519            TokenUnderCursor::MaybeTextFile {
520                value: path,
521                lnum: Some(10),
522                col: None
523            }
524        );
525    }
526
527    #[test]
528    #[cfg(target_os = "macos")]
529    fn token_under_cursor_classify_path_lnum_col_extra_ignores_extra() {
530        let mut temp_file = NamedTempFile::new().unwrap();
531        std::io::Write::write_all(&mut temp_file, b"hello world").unwrap();
532        let path = temp_file.path().to_string_lossy().to_string();
533        let result = TokenUnderCursor::classify(&format!("{path}:10:5:extra"));
534        assert2::let_assert!(Ok(actual) = result);
535        pretty_assertions::assert_eq!(
536            actual,
537            TokenUnderCursor::TextFile {
538                path,
539                lnum: Some(10),
540                col: Some(5)
541            }
542        );
543    }
544
545    #[rstest]
546    #[case("https://example.com", "https://example.com")]
547    #[case("http://example.com", "http://example.com")]
548    #[case("\"https://example.com\"", "https://example.com")]
549    #[case("`https://example.com`", "https://example.com")]
550    #[case("'https://example.com'", "https://example.com")]
551    #[case("{https://example.com}", "https://example.com")]
552    #[case("(https://example.com)", "https://example.com")]
553    #[case("[text](https://example.com)", "https://example.com")]
554    #[case("[[text]](https://example.com)", "https://example.com")]
555    #[case("https://example.com extra", "https://example.com")]
556    #[case("http://example.com with text", "http://example.com")]
557    #[case("(http://example.com)", "http://example.com")]
558    #[case("`http://example.com`", "http://example.com")]
559    fn classify_url_returns_the_token_url_under_curos(#[case] input: &str, #[case] expected_value: &str) {
560        assert2::let_assert!(Ok(actual) = TokenUnderCursor::classify_url(input));
561        pretty_assertions::assert_eq!(actual, TokenUnderCursor::Url(expected_value.to_string()));
562    }
563
564    #[rstest]
565    #[case("not a url")]
566    #[case("[text](noturl)")]
567    fn classify_url_when_cannot_classify_url_returns_the_expected_error(#[case] input: &str) {
568        assert2::let_assert!(Err(err) = TokenUnderCursor::classify_url(input));
569        assert!(err.downcast_ref::<url::ParseError>().is_some());
570    }
571
572    #[rstest]
573    #[case("[hello](world)", Some("world"))]
574    #[case("[hello world](https://example.com)", Some("https://example.com"))]
575    #[case("[text](url with spaces)", Some("url with spaces"))]
576    #[case("[a](1)[b](2)", Some("1"))]
577    #[case("[hello]()", Some(""))]
578    #[case("[hello](world", Some("world"))]
579    #[case("hello](world)", Some("world"))]
580    #[case("hello](world", Some("world"))]
581    #[case("no link", None)]
582    #[case("[incomplete", None)]
583    #[case("](empty)", Some("empty"))]
584    fn extract_markdown_link_works_as_expected(#[case] input: &str, #[case] expected: Option<&str>) {
585        pretty_assertions::assert_eq!(extract_markdown_link(input), expected);
586    }
587
588    #[rstest]
589    #[case("https://example.com", Some("https://example.com"))]
590    #[case("http://site.org", Some("http://site.org"))]
591    #[case("https://example.com with text", Some("https://example.com"))]
592    #[case("http://site.org more", Some("http://site.org"))]
593    #[case("text https://example.com", None)]
594    #[case("no link here", None)]
595    #[case("https://first.com https://second.com", Some("https://first.com"))]
596    #[case("http://a.com https://b.com", Some("http://a.com"))]
597    #[case("https://a.com http://b.com", Some("https://a.com"))]
598    #[case("https://example.com/path?query=value", Some("https://example.com/path?query=value"))]
599    #[case("https://example.com:8080", Some("https://example.com:8080"))]
600    fn extract_https_or_http_link_scenarios(#[case] input: &str, #[case] expected: Option<&str>) {
601        pretty_assertions::assert_eq!(extract_https_or_http_link(input), expected);
602    }
603}