ytil_agents/agent/session_parser/
cursor.rs1use std::path::PathBuf;
2
3use chrono::DateTime;
4use chrono::Utc;
5use rootcause::option_ext::OptionExt;
6use rootcause::prelude::ResultExt;
7use rootcause::report;
8use serde::Deserialize;
9
10use crate::agent::Agent;
11use crate::agent::session::SearchTextBuilder;
12use crate::agent::session::Session;
13
14pub fn parse(meta_hex: &str, workspace_dir: PathBuf) -> rootcause::Result<CursorSession> {
19 let doc = parse_meta(meta_hex)?;
20
21 let created_at = DateTime::from_timestamp_millis(doc.created_at)
22 .map(|datetime| datetime.to_utc())
23 .context("Cursor createdAt is out of range".to_owned())
24 .attach(format!("session_id={}", doc.agent_id))
25 .attach(format!("created_at_ms={}", doc.created_at))?;
26
27 let name = doc.name.unwrap_or_else(|| {
28 workspace_dir
29 .file_name()
30 .and_then(|name| name.to_str())
31 .filter(|name| !name.is_empty())
32 .map_or_else(|| doc.agent_id.clone(), str::to_owned)
33 });
34
35 Ok(CursorSession {
36 id: doc.agent_id,
37 name: name.clone(),
38 search_text: name,
39 workspace: workspace_dir,
40 created_at,
41 updated_at: created_at,
42 })
43}
44
45pub fn parse_session_id(meta_hex: &str) -> rootcause::Result<String> {
50 parse_meta(meta_hex).map(|meta| meta.agent_id)
51}
52
53fn parse_meta(meta_hex: &str) -> rootcause::Result<CursorMeta> {
54 let meta_json = decode_hex_string(meta_hex)
55 .context("failed to decode Cursor meta payload".to_owned())
56 .attach(format!("meta_hex={meta_hex}"))?;
57 Ok(serde_json::from_str::<CursorMeta>(&meta_json)
58 .context("failed to parse Cursor session metadata".to_owned())
59 .attach(format!("meta_json={meta_json}"))?)
60}
61
62#[derive(Clone, Debug, Eq, PartialEq)]
63pub struct CursorSession {
64 pub id: String,
65 pub name: String,
66 pub search_text: String,
67 pub workspace: PathBuf,
68 pub created_at: DateTime<Utc>,
69 pub updated_at: DateTime<Utc>,
70}
71
72impl CursorSession {
73 pub fn into_session(self, path: PathBuf) -> Session {
74 let mut session = Session::new(Agent::Cursor, self.id, self.workspace, path, None, self.created_at);
75 session.name = self.name;
76 session.search_text = self.search_text;
77 session.updated_at = self.updated_at;
78 session
79 }
80}
81
82fn decode_hex_string(raw: &str) -> rootcause::Result<String> {
83 let hex = raw.trim();
84
85 if !hex.len().is_multiple_of(2) {
86 return Err(report!("hex string has odd length").attach(format!("len={}", hex.len())));
87 }
88
89 let mut bytes = Vec::with_capacity(hex.len() / 2);
90 for pair in hex.as_bytes().chunks_exact(2) {
91 let pair = std::str::from_utf8(pair).context("hex chunk is not utf8".to_owned())?;
92 let byte = u8::from_str_radix(pair, 16).context("invalid hex byte".to_owned())?;
93 bytes.push(byte);
94 }
95
96 Ok(String::from_utf8(bytes).context("decoded hex string is not utf8".to_owned())?)
97}
98
99pub fn build_search_text_from_strings(session_name: &str, strings_output: &str) -> String {
100 let mut search_text = SearchTextBuilder::default();
101 for line in strings_output.lines().filter_map(searchable_cursor_strings_line) {
102 search_text.push(&line);
103 }
104 search_text.build(session_name)
105}
106
107pub fn extract_cursor_workspace_from_strings(
108 strings_output: &str,
109 known_workspaces: &[PathBuf],
110 ignored_roots: &[PathBuf],
111) -> Option<PathBuf> {
112 let mut known_matches: Vec<PathBuf> = known_workspaces
113 .iter()
114 .filter(|workspace| workspace.to_str().is_some_and(|value| strings_output.contains(value)))
115 .cloned()
116 .collect();
117 known_matches.sort_by_key(|workspace| std::cmp::Reverse(workspace.components().count()));
118 if let Some(workspace) = known_matches.into_iter().next() {
119 return Some(workspace);
120 }
121
122 for line in strings_output.lines() {
123 for candidate in extract_absolute_path_candidates(line) {
124 let Some(existing_path) = longest_existing_path(&candidate) else {
125 continue;
126 };
127 let workspace_dir = if existing_path.is_dir() {
128 existing_path
129 } else if let Some(parent) = existing_path.parent() {
130 parent.to_path_buf()
131 } else {
132 continue;
133 };
134 if ignored_roots.iter().any(|root| workspace_dir.starts_with(root)) {
135 continue;
136 }
137 return Some(workspace_dir);
138 }
139 }
140
141 None
142}
143
144#[derive(Debug, Deserialize)]
145struct CursorMeta {
146 #[serde(rename = "agentId")]
147 agent_id: String,
148 name: Option<String>,
149 #[serde(rename = "createdAt")]
150 created_at: i64,
151}
152
153fn extract_absolute_path_candidates(line: &str) -> Vec<String> {
154 let mut candidates = Vec::new();
155 candidates.extend(extract_prefixed_candidates(line, "file:///"));
156 candidates.extend(extract_prefixed_candidates(line, "/"));
157 candidates
158}
159
160fn searchable_cursor_strings_line(line: &str) -> Option<String> {
161 let normalized = line.split_whitespace().collect::<Vec<_>>().join(" ");
162 let normalized = (!normalized.is_empty()).then_some(normalized)?;
163 if normalized.len() < 8 {
164 return None;
165 }
166 if !normalized.chars().any(char::is_alphabetic) || !normalized.chars().any(char::is_whitespace) {
167 return None;
168 }
169 if normalized.chars().all(|ch| ch.is_ascii_hexdigit()) {
170 return None;
171 }
172 if !extract_absolute_path_candidates(&normalized).is_empty() {
173 return None;
174 }
175
176 let lower = normalized.to_ascii_lowercase();
177 if lower.contains("create table")
178 || lower.contains("sqlite_")
179 || lower.contains("indexsqlite_")
180 || lower.starts_with("file:///")
181 {
182 return None;
183 }
184
185 Some(normalized)
186}
187
188fn extract_prefixed_candidates(line: &str, prefix: &str) -> Vec<String> {
189 let mut candidates = Vec::new();
190 let mut start = 0;
191 while let Some(search_area) = line.get(start..) {
192 let Some(offset) = search_area.find(prefix) else {
193 break;
194 };
195 let absolute_start = start.saturating_add(offset);
196 let Some(suffix) = line.get(absolute_start..) else {
197 break;
198 };
199 let candidate: String = suffix.chars().take_while(|ch| is_path_char(*ch)).collect();
200 if !candidate.is_empty() {
201 candidates.push(candidate);
202 }
203 start = absolute_start.saturating_add(prefix.len());
204 }
205 candidates
206}
207
208const fn is_path_char(ch: char) -> bool {
209 ch.is_ascii_alphanumeric() || matches!(ch, '/' | '.' | '_' | '-' | '~')
210}
211
212fn longest_existing_path(candidate: &str) -> Option<PathBuf> {
213 let normalized = candidate.strip_prefix("file://").unwrap_or(candidate);
214 let mut path = PathBuf::from(normalized);
215
216 while !path.exists() {
217 if !path.pop() {
218 return None;
219 }
220 }
221
222 Some(path)
223}
224
225#[cfg(test)]
226mod tests {
227
228 use tempfile::tempdir;
229
230 use super::*;
231
232 #[test]
233 fn test_decodes_cursor_meta_hex_payload() {
234 assert2::assert!(let Ok(decoded) = decode_hex_string("7b226e616d65223a225361666520526562617365227d"));
235 pretty_assertions::assert_eq!(decoded, "{\"name\":\"Safe Rebase\"}");
236 }
237
238 #[test]
239 fn test_parses_cursor_session_from_meta_json() {
240 let tempdir = tempdir().unwrap();
241 let workspace = tempdir.path().join("workspace");
242 std::fs::create_dir_all(&workspace).unwrap();
243
244 let meta_hex = "7b226167656e744964223a2266626364393632362d623065642d343739632d623838372d376132633264313531376636222c226e616d65223a225361666520526562617365222c22637265617465644174223a313737343837373733383031337d";
245 assert2::assert!(let Ok(cursor_session) = parse(meta_hex, workspace.clone()));
246 let session = cursor_session.into_session(workspace.join("store.db"));
247 pretty_assertions::assert_eq!(session.agent, Agent::Cursor);
248 pretty_assertions::assert_eq!(session.workspace, workspace);
249 pretty_assertions::assert_eq!(session.name, "Safe Rebase");
250 }
251
252 #[test]
253 fn test_extracts_cursor_workspace_from_known_workspaces_first() {
254 let tempdir = tempdir().unwrap();
255 let workspace = tempdir.path().join("work").join("dotfiles");
256 std::fs::create_dir_all(&workspace).unwrap();
257
258 let strings_output = format!("file://{}/README.md\n{}\n", workspace.display(), workspace.display());
259 let extracted = extract_cursor_workspace_from_strings(&strings_output, std::slice::from_ref(&workspace), &[]);
260 pretty_assertions::assert_eq!(extracted, Some(workspace));
261 }
262
263 #[test]
264 fn test_extracts_cursor_workspace_from_generic_path_candidates() {
265 let tempdir = tempdir().unwrap();
266 let workspace = tempdir.path().join("work").join("repo");
267 let ignored = tempdir.path().join("home").join(".cursor");
268 std::fs::create_dir_all(workspace.join("src")).unwrap();
269 std::fs::create_dir_all(&ignored).unwrap();
270
271 let strings_output = format!("garbage file://{}/src/main.rs trailing", workspace.display());
272 let extracted = extract_cursor_workspace_from_strings(&strings_output, &[], &[ignored]);
273 pretty_assertions::assert_eq!(extracted, Some(workspace.join("src")));
274 }
275
276 #[test]
277 fn test_build_search_text_from_strings_keeps_human_lines_and_filters_noise() {
278 let strings_output = concat!(
279 "CREATE TABLE blobs (id TEXT PRIMARY KEY, data BLOB);\n",
280 "indexsqlite_autoindex_blobs_1blobs\n",
281 "/Users/foo/bar/baz\n",
282 "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n",
283 "user asked about stalled sync job\n",
284 "user asked about stalled sync job\n",
285 "assistant suggested retrying the worker\n"
286 );
287
288 let search_text = build_search_text_from_strings("Cursor Session", strings_output);
289
290 pretty_assertions::assert_eq!(
291 search_text,
292 "Cursor Session user asked about stalled sync job assistant suggested retrying the worker"
293 );
294 }
295}