Skip to main content

ytil_agents/agent/session_parser/
cursor.rs

1use std::path::PathBuf;
2
3use chrono::DateTime;
4use chrono::Utc;
5use rootcause::option_ext::OptionExt;
6use rootcause::prelude::ResultExt;
7use rootcause::report;
8use serde::Deserialize;
9
10use crate::agent::Agent;
11use crate::agent::session::SearchTextBuilder;
12use crate::agent::session::Session;
13
14/// Parse Cursor session metadata into a session.
15///
16/// # Errors
17/// Returns an error when the encoded metadata is invalid or contains an invalid timestamp.
18pub fn parse(meta_hex: &str, workspace_dir: PathBuf) -> rootcause::Result<CursorSession> {
19    let doc = parse_meta(meta_hex)?;
20
21    let created_at = DateTime::from_timestamp_millis(doc.created_at)
22        .map(|datetime| datetime.to_utc())
23        .context("Cursor createdAt is out of range".to_owned())
24        .attach(format!("session_id={}", doc.agent_id))
25        .attach(format!("created_at_ms={}", doc.created_at))?;
26
27    let name = doc.name.unwrap_or_else(|| {
28        workspace_dir
29            .file_name()
30            .and_then(|name| name.to_str())
31            .filter(|name| !name.is_empty())
32            .map_or_else(|| doc.agent_id.clone(), str::to_owned)
33    });
34
35    Ok(CursorSession {
36        id: doc.agent_id,
37        name: name.clone(),
38        search_text: name,
39        workspace: workspace_dir,
40        created_at,
41        updated_at: created_at,
42    })
43}
44
45/// Parse only the session id from Cursor metadata.
46///
47/// # Errors
48/// Returns an error when the encoded metadata is invalid or missing required fields.
49pub fn parse_session_id(meta_hex: &str) -> rootcause::Result<String> {
50    parse_meta(meta_hex).map(|meta| meta.agent_id)
51}
52
53fn parse_meta(meta_hex: &str) -> rootcause::Result<CursorMeta> {
54    let meta_json = decode_hex_string(meta_hex)
55        .context("failed to decode Cursor meta payload".to_owned())
56        .attach(format!("meta_hex={meta_hex}"))?;
57    Ok(serde_json::from_str::<CursorMeta>(&meta_json)
58        .context("failed to parse Cursor session metadata".to_owned())
59        .attach(format!("meta_json={meta_json}"))?)
60}
61
62#[derive(Clone, Debug, Eq, PartialEq)]
63pub struct CursorSession {
64    pub id: String,
65    pub name: String,
66    pub search_text: String,
67    pub workspace: PathBuf,
68    pub created_at: DateTime<Utc>,
69    pub updated_at: DateTime<Utc>,
70}
71
72impl CursorSession {
73    pub fn into_session(self, path: PathBuf) -> Session {
74        let mut session = Session::new(Agent::Cursor, self.id, self.workspace, path, None, self.created_at);
75        session.name = self.name;
76        session.search_text = self.search_text;
77        session.updated_at = self.updated_at;
78        session
79    }
80}
81
82fn decode_hex_string(raw: &str) -> rootcause::Result<String> {
83    let hex = raw.trim();
84
85    if !hex.len().is_multiple_of(2) {
86        return Err(report!("hex string has odd length").attach(format!("len={}", hex.len())));
87    }
88
89    let mut bytes = Vec::with_capacity(hex.len() / 2);
90    for pair in hex.as_bytes().chunks_exact(2) {
91        let pair = std::str::from_utf8(pair).context("hex chunk is not utf8".to_owned())?;
92        let byte = u8::from_str_radix(pair, 16).context("invalid hex byte".to_owned())?;
93        bytes.push(byte);
94    }
95
96    Ok(String::from_utf8(bytes).context("decoded hex string is not utf8".to_owned())?)
97}
98
99pub fn build_search_text_from_strings(session_name: &str, strings_output: &str) -> String {
100    let mut search_text = SearchTextBuilder::default();
101    for line in strings_output.lines().filter_map(searchable_cursor_strings_line) {
102        search_text.push(&line);
103    }
104    search_text.build(session_name)
105}
106
107pub fn extract_cursor_workspace_from_strings(
108    strings_output: &str,
109    known_workspaces: &[PathBuf],
110    ignored_roots: &[PathBuf],
111) -> Option<PathBuf> {
112    let mut known_matches: Vec<PathBuf> = known_workspaces
113        .iter()
114        .filter(|workspace| workspace.to_str().is_some_and(|value| strings_output.contains(value)))
115        .cloned()
116        .collect();
117    known_matches.sort_by_key(|workspace| std::cmp::Reverse(workspace.components().count()));
118    if let Some(workspace) = known_matches.into_iter().next() {
119        return Some(workspace);
120    }
121
122    for line in strings_output.lines() {
123        for candidate in extract_absolute_path_candidates(line) {
124            let Some(existing_path) = longest_existing_path(&candidate) else {
125                continue;
126            };
127            let workspace_dir = if existing_path.is_dir() {
128                existing_path
129            } else if let Some(parent) = existing_path.parent() {
130                parent.to_path_buf()
131            } else {
132                continue;
133            };
134            if ignored_roots.iter().any(|root| workspace_dir.starts_with(root)) {
135                continue;
136            }
137            return Some(workspace_dir);
138        }
139    }
140
141    None
142}
143
144#[derive(Debug, Deserialize)]
145struct CursorMeta {
146    #[serde(rename = "agentId")]
147    agent_id: String,
148    name: Option<String>,
149    #[serde(rename = "createdAt")]
150    created_at: i64,
151}
152
153fn extract_absolute_path_candidates(line: &str) -> Vec<String> {
154    let mut candidates = Vec::new();
155    candidates.extend(extract_prefixed_candidates(line, "file:///"));
156    candidates.extend(extract_prefixed_candidates(line, "/"));
157    candidates
158}
159
160fn searchable_cursor_strings_line(line: &str) -> Option<String> {
161    let normalized = line.split_whitespace().collect::<Vec<_>>().join(" ");
162    let normalized = (!normalized.is_empty()).then_some(normalized)?;
163    if normalized.len() < 8 {
164        return None;
165    }
166    if !normalized.chars().any(char::is_alphabetic) || !normalized.chars().any(char::is_whitespace) {
167        return None;
168    }
169    if normalized.chars().all(|ch| ch.is_ascii_hexdigit()) {
170        return None;
171    }
172    if !extract_absolute_path_candidates(&normalized).is_empty() {
173        return None;
174    }
175
176    let lower = normalized.to_ascii_lowercase();
177    if lower.contains("create table")
178        || lower.contains("sqlite_")
179        || lower.contains("indexsqlite_")
180        || lower.starts_with("file:///")
181    {
182        return None;
183    }
184
185    Some(normalized)
186}
187
188fn extract_prefixed_candidates(line: &str, prefix: &str) -> Vec<String> {
189    let mut candidates = Vec::new();
190    let mut start = 0;
191    while let Some(search_area) = line.get(start..) {
192        let Some(offset) = search_area.find(prefix) else {
193            break;
194        };
195        let absolute_start = start.saturating_add(offset);
196        let Some(suffix) = line.get(absolute_start..) else {
197            break;
198        };
199        let candidate: String = suffix.chars().take_while(|ch| is_path_char(*ch)).collect();
200        if !candidate.is_empty() {
201            candidates.push(candidate);
202        }
203        start = absolute_start.saturating_add(prefix.len());
204    }
205    candidates
206}
207
208const fn is_path_char(ch: char) -> bool {
209    ch.is_ascii_alphanumeric() || matches!(ch, '/' | '.' | '_' | '-' | '~')
210}
211
212fn longest_existing_path(candidate: &str) -> Option<PathBuf> {
213    let normalized = candidate.strip_prefix("file://").unwrap_or(candidate);
214    let mut path = PathBuf::from(normalized);
215
216    while !path.exists() {
217        if !path.pop() {
218            return None;
219        }
220    }
221
222    Some(path)
223}
224
225#[cfg(test)]
226mod tests {
227
228    use tempfile::tempdir;
229
230    use super::*;
231
232    #[test]
233    fn test_decodes_cursor_meta_hex_payload() {
234        assert2::assert!(let Ok(decoded) = decode_hex_string("7b226e616d65223a225361666520526562617365227d"));
235        pretty_assertions::assert_eq!(decoded, "{\"name\":\"Safe Rebase\"}");
236    }
237
238    #[test]
239    fn test_parses_cursor_session_from_meta_json() {
240        let tempdir = tempdir().unwrap();
241        let workspace = tempdir.path().join("workspace");
242        std::fs::create_dir_all(&workspace).unwrap();
243
244        let meta_hex = "7b226167656e744964223a2266626364393632362d623065642d343739632d623838372d376132633264313531376636222c226e616d65223a225361666520526562617365222c22637265617465644174223a313737343837373733383031337d";
245        assert2::assert!(let Ok(cursor_session) = parse(meta_hex, workspace.clone()));
246        let session = cursor_session.into_session(workspace.join("store.db"));
247        pretty_assertions::assert_eq!(session.agent, Agent::Cursor);
248        pretty_assertions::assert_eq!(session.workspace, workspace);
249        pretty_assertions::assert_eq!(session.name, "Safe Rebase");
250    }
251
252    #[test]
253    fn test_extracts_cursor_workspace_from_known_workspaces_first() {
254        let tempdir = tempdir().unwrap();
255        let workspace = tempdir.path().join("work").join("dotfiles");
256        std::fs::create_dir_all(&workspace).unwrap();
257
258        let strings_output = format!("file://{}/README.md\n{}\n", workspace.display(), workspace.display());
259        let extracted = extract_cursor_workspace_from_strings(&strings_output, std::slice::from_ref(&workspace), &[]);
260        pretty_assertions::assert_eq!(extracted, Some(workspace));
261    }
262
263    #[test]
264    fn test_extracts_cursor_workspace_from_generic_path_candidates() {
265        let tempdir = tempdir().unwrap();
266        let workspace = tempdir.path().join("work").join("repo");
267        let ignored = tempdir.path().join("home").join(".cursor");
268        std::fs::create_dir_all(workspace.join("src")).unwrap();
269        std::fs::create_dir_all(&ignored).unwrap();
270
271        let strings_output = format!("garbage file://{}/src/main.rs trailing", workspace.display());
272        let extracted = extract_cursor_workspace_from_strings(&strings_output, &[], &[ignored]);
273        pretty_assertions::assert_eq!(extracted, Some(workspace.join("src")));
274    }
275
276    #[test]
277    fn test_build_search_text_from_strings_keeps_human_lines_and_filters_noise() {
278        let strings_output = concat!(
279            "CREATE TABLE blobs (id TEXT PRIMARY KEY, data BLOB);\n",
280            "indexsqlite_autoindex_blobs_1blobs\n",
281            "/Users/foo/bar/baz\n",
282            "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n",
283            "user asked about stalled sync job\n",
284            "user asked about stalled sync job\n",
285            "assistant suggested retrying the worker\n"
286        );
287
288        let search_text = build_search_text_from_strings("Cursor Session", strings_output);
289
290        pretty_assertions::assert_eq!(
291            search_text,
292            "Cursor Session user asked about stalled sync job assistant suggested retrying the worker"
293        );
294    }
295}