11import { tool } from "@opencode-ai/plugin" ;
22import { existsSync } from "fs" ;
3- import { join , basename } from "path" ;
3+ import { join , basename , extname } from "path" ;
44import { spawn } from "child_process" ;
55
66/**
7- * PDF Brain - Local PDF knowledge base with vector search
7+ * PDF Brain - Local knowledge base with vector search
88 *
9+ * Supports PDFs and Markdown files (local paths or URLs).
910 * Uses PGlite + pgvector for semantic search via Ollama embeddings.
1011 * Stores in ~/Documents/.pdf-library/ for iCloud sync.
1112 */
@@ -19,8 +20,7 @@ async function runCli(
1920 signal ?: AbortSignal ,
2021) : Promise < string > {
2122 return new Promise ( ( resolve ) => {
22- // Use bunx for faster execution than npx (no registry check if cached)
23- const proc = spawn ( "bunx" , [ "pdf-brain" , ...args ] , {
23+ const proc = spawn ( "pdf-brain" , args , {
2424 env : { ...process . env } ,
2525 stdio : [ "ignore" , "pipe" , "pipe" ] ,
2626 } ) ;
@@ -75,31 +75,48 @@ async function runCli(
7575 } ) ;
7676}
7777
78+ function isUrl ( str : string ) : boolean {
79+ return str . startsWith ( "http://" ) || str . startsWith ( "https://" ) ;
80+ }
81+
82+ function isValidFile ( path : string ) : boolean {
83+ const ext = extname ( path ) . toLowerCase ( ) ;
84+ return ext === ".pdf" || ext === ".md" || ext === ".markdown" ;
85+ }
86+
7887export const add = tool ( {
7988 description :
80- "Add a PDF to the library - extracts text, generates embeddings for semantic search" ,
89+ "Add a PDF or Markdown file to the library - extracts text, generates embeddings for semantic search. Supports local paths and URLs. " ,
8190 args : {
82- path : tool . schema . string ( ) . describe ( "Path to PDF file " ) ,
91+ path : tool . schema . string ( ) . describe ( "Path to file ( PDF/Markdown) or URL " ) ,
8392 tags : tool . schema . string ( ) . optional ( ) . describe ( "Comma-separated tags" ) ,
8493 title : tool . schema
8594 . string ( )
8695 . optional ( )
87- . describe ( "Custom title (default: filename)" ) ,
96+ . describe ( "Custom title (default: filename or frontmatter )" ) ,
8897 } ,
89- async execute ( { path : pdfPath , tags, title } , ctx ) {
90- // Resolve path
91- const resolvedPath = pdfPath . startsWith ( "~" )
92- ? pdfPath . replace ( "~" , process . env . HOME || "" )
93- : pdfPath . startsWith ( "/" )
94- ? pdfPath
95- : join ( process . cwd ( ) , pdfPath ) ;
98+ async execute ( { path : filePath , tags, title } , ctx ) {
99+ // Handle URLs directly
100+ if ( isUrl ( filePath ) ) {
101+ const args = [ "add" , filePath ] ;
102+ if ( tags ) args . push ( "--tags" , tags ) ;
103+ if ( title ) args . push ( "--title" , title ) ;
104+ return runCli ( args , EMBEDDING_TIMEOUT_MS , ctx ?. abort ) ;
105+ }
106+
107+ // Resolve local path
108+ const resolvedPath = filePath . startsWith ( "~" )
109+ ? filePath . replace ( "~" , process . env . HOME || "" )
110+ : filePath . startsWith ( "/" )
111+ ? filePath
112+ : join ( process . cwd ( ) , filePath ) ;
96113
97114 if ( ! existsSync ( resolvedPath ) ) {
98115 return `File not found: ${ resolvedPath } ` ;
99116 }
100117
101- if ( ! resolvedPath . toLowerCase ( ) . endsWith ( ".pdf" ) ) {
102- return "Not a PDF file " ;
118+ if ( ! isValidFile ( resolvedPath ) ) {
119+ return "Unsupported file type. Use PDF or Markdown files. " ;
103120 }
104121
105122 const args = [ "add" , resolvedPath ] ;
@@ -113,7 +130,7 @@ export const add = tool({
113130
114131export const search = tool ( {
115132 description :
116- "Semantic search across all PDFs using vector similarity (requires Ollama)" ,
133+ "Semantic search across all documents using vector similarity (requires Ollama)" ,
117134 args : {
118135 query : tool . schema . string ( ) . describe ( "Natural language search query" ) ,
119136 limit : tool . schema
@@ -124,31 +141,36 @@ export const search = tool({
124141 fts : tool . schema
125142 . boolean ( )
126143 . optional ( )
127- . describe ( "Use full-text search only (no embeddings)" ) ,
144+ . describe ( "Use full-text search only (skip embeddings)" ) ,
145+ expand : tool . schema
146+ . number ( )
147+ . optional ( )
148+ . describe ( "Expand context around matches (max: 4000 chars)" ) ,
128149 } ,
129- async execute ( { query, limit, tag, fts } , ctx ) {
150+ async execute ( { query, limit, tag, fts, expand } , ctx ) {
130151 const args = [ "search" , query ] ;
131152 if ( limit ) args . push ( "--limit" , String ( limit ) ) ;
132153 if ( tag ) args . push ( "--tag" , tag ) ;
133154 if ( fts ) args . push ( "--fts" ) ;
155+ if ( expand ) args . push ( "--expand" , String ( Math . min ( expand , 4000 ) ) ) ;
134156
135157 // Vector search needs Ollama for query embedding (unless fts-only)
136158 return runCli ( args , fts ? DEFAULT_TIMEOUT_MS : 60_000 , ctx ?. abort ) ;
137159 } ,
138160} ) ;
139161
140162export const read = tool ( {
141- description : "Get details about a specific PDF in the library " ,
163+ description : "Get document details and metadata " ,
142164 args : {
143- query : tool . schema . string ( ) . describe ( "PDF ID or title" ) ,
165+ query : tool . schema . string ( ) . describe ( "Document ID or title" ) ,
144166 } ,
145167 async execute ( { query } , ctx ) {
146- return runCli ( [ "get " , query ] , DEFAULT_TIMEOUT_MS , ctx ?. abort ) ;
168+ return runCli ( [ "read " , query ] , DEFAULT_TIMEOUT_MS , ctx ?. abort ) ;
147169 } ,
148170} ) ;
149171
150172export const list = tool ( {
151- description : "List all PDFs in the library" ,
173+ description : "List all documents in the library" ,
152174 args : {
153175 tag : tool . schema . string ( ) . optional ( ) . describe ( "Filter by tag" ) ,
154176 } ,
@@ -160,19 +182,19 @@ export const list = tool({
160182} ) ;
161183
162184export const remove = tool ( {
163- description : "Remove a PDF from the library" ,
185+ description : "Remove a document from the library" ,
164186 args : {
165- query : tool . schema . string ( ) . describe ( "PDF ID or title to remove" ) ,
187+ query : tool . schema . string ( ) . describe ( "Document ID or title to remove" ) ,
166188 } ,
167189 async execute ( { query } , ctx ) {
168190 return runCli ( [ "remove" , query ] , DEFAULT_TIMEOUT_MS , ctx ?. abort ) ;
169191 } ,
170192} ) ;
171193
172194export const tag = tool ( {
173- description : "Set tags on a PDF " ,
195+ description : "Set tags on a document " ,
174196 args : {
175- query : tool . schema . string ( ) . describe ( "PDF ID or title" ) ,
197+ query : tool . schema . string ( ) . describe ( "Document ID or title" ) ,
176198 tags : tool . schema . string ( ) . describe ( "Comma-separated tags to set" ) ,
177199 } ,
178200 async execute ( { query, tags } , ctx ) {
@@ -196,10 +218,81 @@ export const check = tool({
196218 } ,
197219} ) ;
198220
221+ export const repair = tool ( {
222+ description :
223+ "Fix database integrity issues - removes orphaned chunks/embeddings" ,
224+ args : { } ,
225+ async execute ( _args , ctx ) {
226+ return runCli ( [ "repair" ] , DEFAULT_TIMEOUT_MS , ctx ?. abort ) ;
227+ } ,
228+ } ) ;
229+
230+ export const exportLib = tool ( {
231+ description : "Export library database for backup or sharing" ,
232+ args : {
233+ output : tool . schema
234+ . string ( )
235+ . optional ( )
236+ . describe ( "Output file path (default: ./pdf-brain-export.tar.gz)" ) ,
237+ } ,
238+ async execute ( { output } , ctx ) {
239+ const args = [ "export" ] ;
240+ if ( output ) args . push ( "--output" , output ) ;
241+ return runCli ( args , 60_000 , ctx ?. abort ) ;
242+ } ,
243+ } ) ;
244+
245+ export const importLib = tool ( {
246+ description : "Import library database from export archive" ,
247+ args : {
248+ file : tool . schema . string ( ) . describe ( "Path to export archive" ) ,
249+ force : tool . schema
250+ . boolean ( )
251+ . optional ( )
252+ . describe ( "Overwrite existing library" ) ,
253+ } ,
254+ async execute ( { file, force } , ctx ) {
255+ const args = [ "import" , file ] ;
256+ if ( force ) args . push ( "--force" ) ;
257+ return runCli ( args , 60_000 , ctx ?. abort ) ;
258+ } ,
259+ } ) ;
260+
261+ export const migrate = tool ( {
262+ description : "Database migration utilities" ,
263+ args : {
264+ check : tool . schema
265+ . boolean ( )
266+ . optional ( )
267+ . describe ( "Check if migration is needed" ) ,
268+ importFile : tool . schema
269+ . string ( )
270+ . optional ( )
271+ . describe ( "Import from SQL dump file" ) ,
272+ generateScript : tool . schema
273+ . boolean ( )
274+ . optional ( )
275+ . describe ( "Generate export script for current database" ) ,
276+ } ,
277+ async execute ( { check, importFile, generateScript } , ctx ) {
278+ const args = [ "migrate" ] ;
279+ if ( check ) args . push ( "--check" ) ;
280+ if ( importFile ) args . push ( "--import" , importFile ) ;
281+ if ( generateScript ) args . push ( "--generate-script" ) ;
282+
283+ // If no flags, just run migrate (shows help)
284+ if ( ! check && ! importFile && ! generateScript ) {
285+ args . push ( "--check" ) ;
286+ }
287+
288+ return runCli ( args , 60_000 , ctx ?. abort ) ;
289+ } ,
290+ } ) ;
291+
199292export const batch_add = tool ( {
200- description : "Add multiple PDFs from a directory" ,
293+ description : "Add multiple PDFs/Markdown files from a directory" ,
201294 args : {
202- dir : tool . schema . string ( ) . describe ( "Directory containing PDFs " ) ,
295+ dir : tool . schema . string ( ) . describe ( "Directory containing documents " ) ,
203296 tags : tool . schema . string ( ) . optional ( ) . describe ( "Tags to apply to all" ) ,
204297 recursive : tool . schema
205298 . boolean ( )
@@ -217,43 +310,40 @@ export const batch_add = tool({
217310 return `Directory not found: ${ resolvedDir } ` ;
218311 }
219312
220- // Find PDFs
221- const { readdirSync, statSync } = await import ( "fs" ) ;
313+ // Find documents
314+ const { readdirSync } = await import ( "fs" ) ;
222315
223- function findPdfs ( dir : string , recurse : boolean ) : string [ ] {
316+ function findDocs ( dir : string , recurse : boolean ) : string [ ] {
224317 const results : string [ ] = [ ] ;
225318 for ( const entry of readdirSync ( dir , { withFileTypes : true } ) ) {
226319 const fullPath = join ( dir , entry . name ) ;
227320 if ( entry . isDirectory ( ) && recurse ) {
228- results . push ( ...findPdfs ( fullPath , true ) ) ;
229- } else if (
230- entry . isFile ( ) &&
231- entry . name . toLowerCase ( ) . endsWith ( ".pdf" )
232- ) {
321+ results . push ( ...findDocs ( fullPath , true ) ) ;
322+ } else if ( entry . isFile ( ) && isValidFile ( entry . name ) ) {
233323 results . push ( fullPath ) ;
234324 }
235325 }
236326 return results ;
237327 }
238328
239- const pdfList = findPdfs ( resolvedDir , recursive ) ;
329+ const docList = findDocs ( resolvedDir , recursive ) ;
240330
241- if ( pdfList . length === 0 ) {
242- return `No PDFs found in ${ resolvedDir } ` ;
331+ if ( docList . length === 0 ) {
332+ return `No PDF or Markdown files found in ${ resolvedDir } ` ;
243333 }
244334
245335 const results : string [ ] = [ ] ;
246336
247- for ( const pdfPath of pdfList ) {
337+ for ( const docPath of docList ) {
248338 // Check for abort between iterations
249339 if ( ctx ?. abort ?. aborted ) {
250- results . push ( "\n\nOperation cancelled - remaining PDFs not processed" ) ;
340+ results . push ( "\n\nOperation cancelled - remaining files not processed" ) ;
251341 break ;
252342 }
253343
254- const title = basename ( pdfPath , ".pdf" ) ;
344+ const title = basename ( docPath , extname ( docPath ) ) ;
255345 try {
256- const args = [ "add" , pdfPath ] ;
346+ const args = [ "add" , docPath ] ;
257347 if ( tags ) args . push ( "--tags" , tags ) ;
258348
259349 const result = await runCli ( args , EMBEDDING_TIMEOUT_MS , ctx ?. abort ) ;
@@ -267,6 +357,6 @@ export const batch_add = tool({
267357 }
268358 }
269359
270- return `# Batch Add Results (${ pdfList . length } PDFs )\n\n${ results . join ( "\n" ) } ` ;
360+ return `# Batch Add Results (${ docList . length } documents )\n\n${ results . join ( "\n" ) } ` ;
271361 } ,
272362} ) ;
0 commit comments