simstudioai
diff --git a/‎apps/sim/app/api/knowledge/route.ts‎
Lines changed: 0 additions & 16 deletions b/‎apps/sim/app/api/knowledge/route.ts‎
Lines changed: 0 additions & 16 deletions
diff --git a/‎apps/sim/app/workspace/[workspaceId]/knowledge/components/create-base-modal/create-base-modal.tsx‎
Lines changed: 0 additions & 7 deletions b/‎apps/sim/app/workspace/[workspaceId]/knowledge/components/create-base-modal/create-base-modal.tsx‎
Lines changed: 0 additions & 7 deletions
diff --git a/‎apps/sim/hooks/queries/kb/knowledge.ts‎
Lines changed: 1 addition & 8 deletions b/‎apps/sim/hooks/queries/kb/knowledge.ts‎
Lines changed: 1 addition & 8 deletions
diff --git a/‎apps/sim/lib/chunkers/docs-chunker.ts‎
Lines changed: 3 additions & 48 deletions b/‎apps/sim/lib/chunkers/docs-chunker.ts‎
Lines changed: 3 additions & 48 deletions
diff --git a/‎apps/sim/lib/chunkers/json-yaml-chunker.test.ts‎
Lines changed: 0 additions & 7 deletions b/‎apps/sim/lib/chunkers/json-yaml-chunker.test.ts‎
Lines changed: 0 additions & 7 deletions
diff --git a/‎apps/sim/lib/chunkers/json-yaml-chunker.ts‎
Lines changed: 0 additions & 28 deletions b/‎apps/sim/lib/chunkers/json-yaml-chunker.ts‎
Lines changed: 0 additions & 28 deletions
@@ -15,14 +15,6 @@ import { captureServerEvent } from '@/lib/posthog/server'
 
 const logger = createLogger('KnowledgeBaseAPI')
 
-/**
- * Schema for creating a knowledge base
- *
- * Chunking config units:
- * - maxSize: tokens (1 token ≈ 4 characters)
- * - minSize: characters
- * - overlap: tokens (1 token ≈ 4 characters)
- */
 const CreateKnowledgeBaseSchema = z.object({
   name: z.string().min(1, 'Name is required'),
   description: z.string().optional(),
@@ -31,25 +23,17 @@ const CreateKnowledgeBaseSchema = z.object({
   embeddingDimension: z.literal(1536).default(1536),
   chunkingConfig: z
     .object({
-      /** Maximum chunk size in tokens (1 token ≈ 4 characters) */
       maxSize: z.number().min(100).max(4000).default(1024),
-      /** Minimum chunk size in characters */
       minSize: z.number().min(1).max(2000).default(100),
-      /** Overlap between chunks in tokens (1 token ≈ 4 characters) */
       overlap: z.number().min(0).max(500).default(200),
-      /** Chunking strategy */
       strategy: z
         .enum(['auto', 'text', 'regex', 'recursive', 'sentence', 'token'])
         .default('auto')
         .optional(),
-      /** Strategy-specific options */
       strategyOptions: z
         .object({
-          /** Regex pattern for 'regex' strategy (max 500 chars) */
           pattern: z.string().max(500).optional(),
-          /** Custom separator hierarchy for 'recursive' strategy */
           separators: z.array(z.string()).optional(),
-          /** Pre-built separator recipe for 'recursive' strategy */
           recipe: z.enum(['plain', 'markdown', 'code']).optional(),
         })
         .optional(),
 
@@ -60,26 +60,20 @@ const FormSchema = z
       .max(100, 'Name must be less than 100 characters')
       .refine((value) => value.trim().length > 0, 'Name cannot be empty'),
     description: z.string().max(500, 'Description must be less than 500 characters').optional(),
-    /** Minimum chunk size in characters */
     minChunkSize: z
       .number()
       .min(1, 'Min chunk size must be at least 1 character')
       .max(2000, 'Min chunk size must be less than 2000 characters'),
-    /** Maximum chunk size in tokens (1 token ≈ 4 characters) */
     maxChunkSize: z
       .number()
       .min(100, 'Max chunk size must be at least 100 tokens')
       .max(4000, 'Max chunk size must be less than 4000 tokens'),
-    /** Overlap between chunks in tokens */
     overlapSize: z
       .number()
       .min(0, 'Overlap must be non-negative')
       .max(500, 'Overlap must be less than 500 tokens'),
-    /** Chunking strategy */
     strategy: z.enum(['auto', 'text', 'regex', 'recursive', 'sentence', 'token']).default('auto'),
-    /** Regex pattern (required when strategy is 'regex') */
     regexPattern: z.string().optional(),
-    /** Custom separators for recursive strategy (comma-separated) */
     customSeparators: z.string().optional(),
   })
   .refine(
@@ -376,7 +370,6 @@ export const CreateBaseModal = memo(function CreateBaseModal({
               <div className='space-y-3'>
                 <div className='flex flex-col gap-2'>
                   <Label htmlFor='kb-name'>Name</Label>
-                  {/* Hidden decoy fields to prevent browser autofill */}
                   <input
                     type='text'
                     name='fakeusernameremembered'
 
@@ -339,10 +339,7 @@ export interface DocumentChunkSearchParams {
   search: string
 }
 
-/**
- * Fetches all chunks matching a search query by paginating through results.
- * This is used for search functionality where we need all matching chunks.
- */
+/** Paginates through all matching chunks rather than returning a single page. */
 export async function fetchAllDocumentChunks(
   { knowledgeBaseId, documentId, search }: DocumentChunkSearchParams,
   signal?: AbortSignal
@@ -377,10 +374,6 @@ export const serializeSearchParams = (params: DocumentChunkSearchParams) =>
     search: params.search,
   })
 
-/**
- * Hook to search for chunks in a document.
- * Fetches all matching chunks and returns them for client-side pagination.
- */
 export function useDocumentChunkSearchQuery(
   params: DocumentChunkSearchParams,
   options?: {
 
@@ -21,9 +21,6 @@ interface Frontmatter {
 
 const logger = createLogger('DocsChunker')
 
-/**
- * Docs-specific chunker that processes .mdx files and tracks header context
- */
 export class DocsChunker {
   private readonly textChunker: TextChunker
   private readonly baseUrl: string
@@ -39,9 +36,6 @@ export class DocsChunker {
     this.baseUrl = options.baseUrl ?? 'https://docs.sim.ai'
   }
 
-  /**
-   * Process all .mdx files in the docs directory
-   */
   async chunkAllDocs(docsPath: string): Promise<DocChunk[]> {
     const allChunks: DocChunk[] = []
 
@@ -67,9 +61,6 @@ export class DocsChunker {
     }
   }
 
-  /**
-   * Process a single .mdx file
-   */
   async chunkMdxFile(filePath: string, basePath: string): Promise<DocChunk[]> {
     const content = await fs.readFile(filePath, 'utf-8')
     const relativePath = path.relative(basePath, filePath)
@@ -120,9 +111,6 @@ export class DocsChunker {
     return chunks
   }
 
-  /**
-   * Find all .mdx files recursively
-   */
   private async findMdxFiles(dirPath: string): Promise<string[]> {
     const files: string[] = []
 
@@ -142,9 +130,6 @@ export class DocsChunker {
     return files
   }
 
-  /**
-   * Extract headers and their positions from markdown content
-   */
   private extractHeaders(content: string): HeaderInfo[] {
     const headers: HeaderInfo[] = []
     const headerRegex = /^(#{1,6})\s+(.+)$/gm
@@ -166,9 +151,6 @@ export class DocsChunker {
     return headers
   }
 
-  /**
-   * Generate URL-safe anchor from header text
-   */
   private generateAnchor(headerText: string): string {
     return headerText
       .toLowerCase()
@@ -178,10 +160,7 @@ export class DocsChunker {
       .replace(/^-|-$/g, '')
   }
 
-  /**
-   * Generate document URL from relative path
-   * Handles index.mdx files specially - they are served at the parent directory path
-   */
+  /** index.mdx files are served at the parent directory path */
   private generateDocumentUrl(relativePath: string): string {
     let urlPath = relativePath.replace(/\.mdx$/, '').replace(/\\/g, '/')
 
@@ -194,9 +173,6 @@ export class DocsChunker {
     return `${this.baseUrl}/${urlPath}`
   }
 
-  /**
-   * Find the most relevant header for a given position
-   */
   private findRelevantHeader(headers: HeaderInfo[], position: number): HeaderInfo | null {
     if (headers.length === 0) return null
 
@@ -213,11 +189,7 @@ export class DocsChunker {
     return relevantHeader
   }
 
-  /**
-   * Split content into chunks using the existing TextChunker with table awareness.
-   * Returns both the chunks and the cleaned content so header extraction
-   * operates on the same text that was chunked (aligned positions).
-   */
+  /** Returns both chunks and cleaned content so header extraction uses aligned positions. */
   private async splitContent(
     content: string
   ): Promise<{ chunks: string[]; cleanedContent: string }> {
@@ -238,9 +210,6 @@ export class DocsChunker {
     return { chunks: finalChunks, cleanedContent }
   }
 
-  /**
-   * Clean content by removing MDX-specific elements and excessive whitespace
-   */
   private cleanContent(content: string): string {
     return content
       .replace(/\r\n/g, '\n')
@@ -255,9 +224,6 @@ export class DocsChunker {
       .trim()
   }
 
-  /**
-   * Parse frontmatter from MDX content
-   */
   private parseFrontmatter(content: string): { data: Frontmatter; content: string } {
     const frontmatterRegex = /^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/
     const match = content.match(frontmatterRegex)
@@ -285,9 +251,7 @@ export class DocsChunker {
     return { data, content: markdownContent }
   }
 
-  /**
-   * Detect table boundaries in markdown content to avoid splitting them
-   */
+  /** Detects table boundaries to avoid splitting tables across chunks. */
   private detectTableBoundaries(content: string): { start: number; end: number }[] {
     const tables: { start: number; end: number }[] = []
     const lines = content.split('\n')
@@ -331,16 +295,10 @@ export class DocsChunker {
     return tables
   }
 
-  /**
-   * Get character position from line number
-   */
   private getCharacterPosition(lines: string[], lineIndex: number): number {
     return lines.slice(0, lineIndex).reduce((acc, line) => acc + line.length + 1, 0)
   }
 
-  /**
-   * Merge chunks that would split tables
-   */
   private mergeTableChunks(
     chunks: string[],
     tableBoundaries: { start: number; end: number }[],
@@ -393,9 +351,6 @@ export class DocsChunker {
     return mergedChunks.filter((chunk) => chunk.length > 50)
   }
 
-  /**
-   * Enforce token size limit on chunks, using the configured chunkSize
-   */
   private enforceSizeLimit(chunks: string[]): string[] {
     const finalChunks: string[] = []
 
 
@@ -31,12 +31,10 @@ describe('JsonYamlChunker', () => {
     })
 
     it('should return false for plain text parsed as YAML scalar', () => {
-      // js-yaml parses plain text as a scalar value, not an object/array
       expect(JsonYamlChunker.isStructuredData('Hello, this is plain text.')).toBe(false)
     })
 
     it('should return false for invalid JSON/YAML with unbalanced braces', () => {
-      // Only truly malformed content that fails YAML parsing returns false
       expect(JsonYamlChunker.isStructuredData('{invalid: json: content: {{')).toBe(false)
     })
 
@@ -60,7 +58,6 @@ describe('JsonYamlChunker', () => {
       const json = '{}'
       const chunks = await chunker.chunk(json)
 
-      // Empty object is valid JSON, should return at least metadata
       expect(chunks.length).toBeGreaterThanOrEqual(0)
     })
 
@@ -203,7 +200,6 @@ server:
       const json = '[]'
       const chunks = await chunker.chunk(json)
 
-      // Empty array should not produce chunks with meaningful content
       expect(chunks.length).toBeGreaterThanOrEqual(0)
     })
 
@@ -271,7 +267,6 @@ server:
 
     it.concurrent('should fall back to text chunking for invalid JSON', async () => {
       const chunker = new JsonYamlChunker({ chunkSize: 100, minCharactersPerChunk: 10 })
-      // Create content that fails YAML parsing and is long enough to produce chunks
       const invalidJson = `{this is not valid json: content: {{${' more content here '.repeat(10)}`
       const chunks = await chunker.chunk(invalidJson)
 
@@ -376,9 +371,7 @@ server:
       const json = JSON.stringify({ a: 1, b: 2, c: 3 })
       const chunks = await chunker.chunk(json)
 
-      // Should produce chunks that are valid
       expect(chunks.length).toBeGreaterThan(0)
-      // The entire small object fits in one chunk
       expect(chunks[0].text.length).toBeGreaterThan(0)
     })
   })
 
@@ -12,10 +12,6 @@ type JsonArray = JsonValue[]
 
 const MAX_DEPTH = 5
 
-/**
- * Structure-aware chunker for JSON and YAML content
- * Recursively decomposes objects and arrays while preserving structure
- */
 export class JsonYamlChunker {
   private chunkSize: number
   private minCharactersPerChunk: number
@@ -25,9 +21,6 @@ export class JsonYamlChunker {
     this.minCharactersPerChunk = options.minCharactersPerChunk ?? 100
   }
 
-  /**
-   * Check if content is structured JSON/YAML data (object or array, not a primitive)
-   */
   static isStructuredData(content: string): boolean {
     try {
       const parsed = JSON.parse(content)
@@ -42,9 +35,6 @@ export class JsonYamlChunker {
     }
   }
 
-  /**
-   * Chunk JSON/YAML content intelligently based on structure
-   */
   async chunk(content: string): Promise<Chunk[]> {
     try {
       let data: JsonValue
@@ -65,9 +55,6 @@ export class JsonYamlChunker {
     }
   }
 
-  /**
-   * Chunk structured data based on its structure
-   */
   private chunkStructuredData(data: JsonValue, path: string[], depth: number): Chunk[] {
     if (Array.isArray(data)) {
       return this.chunkArray(data, path, depth)
@@ -99,9 +86,6 @@ export class JsonYamlChunker {
     ]
   }
 
-  /**
-   * Chunk an array by batching items until the token budget is reached
-   */
   private chunkArray(arr: JsonArray, path: string[], depth: number): Chunk[] {
     const chunks: Chunk[] = []
     let currentBatch: JsonValue[] = []
@@ -158,9 +142,6 @@ export class JsonYamlChunker {
     return chunks
   }
 
-  /**
-   * Chunk an object by grouping key-value pairs until the token budget is reached
-   */
   private chunkObject(obj: JsonObject, path: string[], depth: number): Chunk[] {
     const chunks: Chunk[] = []
     const entries = Object.entries(obj)
@@ -239,9 +220,6 @@ export class JsonYamlChunker {
     return chunks
   }
 
-  /**
-   * Build a chunk from a batch of array items
-   */
   private buildBatchChunk(
     contextHeader: string,
     batch: JsonValue[],
@@ -256,9 +234,6 @@ export class JsonYamlChunker {
     }
   }
 
-  /**
-   * Fall back to text chunking if JSON parsing fails
-   */
   private chunkAsText(content: string): Chunk[] {
     const chunks: Chunk[] = []
     const lines = content.split('\n')
@@ -296,9 +271,6 @@ export class JsonYamlChunker {
     return chunks
   }
 
-  /**
-   * Static method for chunking JSON/YAML data with default options
-   */
   static async chunkJsonYaml(content: string, options: ChunkerOptions = {}): Promise<Chunk[]> {
     const chunker = new JsonYamlChunker(options)
     return chunker.chunk(content)