Search
POST
Search Document Chunks
POST
Search Document ChunksSearch for relevant document chunks using semantic similarity. This endpoint allows you to find the most semantically similar chunks of text from your documents based on a search query.
Parameters:
query
string
Yes
-
The search query text
filters
object
No
null
Metadata filters to apply
k
integer
No
4
Number of chunks to return
min_score
float
No
0.0
Minimum similarity score threshold
Returns: List of ChunkResult objects with:
content
: The chunk contentscore
: Similarity scoredocument_id
: Source document IDchunk_number
: Position in documentmetadata
: Document metadatacontent_type
: Document content typefilename
: Original filenamedownload_url
: URL to download source file (if applicable)
from databridge import DataBridge
db = DataBridge(uri="your-databridge-uri")
# Search for relevant chunks
chunks = db.retrieve_chunks(
query="machine learning applications",
filters={"category": "tech"},
k=3,
min_score=0.7
)
for chunk in chunks:
print(f"\nMatch (score: {chunk.score:.2f}):")
print(chunk.content)
print(f"From document: {chunk.document_id}")
if chunk.download_url:
print(f"Download URL: {chunk.download_url}")
Response:
[
{
"content": "Machine learning is transforming...",
"score": 0.89,
"document_id": "doc_abc123",
"chunk_number": 0,
"metadata": {
"title": "ML Overview",
"category": "tech"
},
"content_type": "text/plain",
"filename": null,
"download_url": null
}
]
GET
List Documents
GET
List DocumentsList accessible documents with pagination and optional filtering. Returns a list of all documents you have access to, with support for pagination and metadata filtering.
Parameters:
skip
(optional): Number of documents to skip (default: 0)limit
(optional): Maximum documents to return (default: 100)filters
(optional): JSON-encoded metadata filters
Returns: List of Document objects containing all document metadata and storage information.
from databridge import DataBridge
db = DataBridge(uri="your-databridge-uri")
# List documents with filters
docs = db.list_documents(
skip=0,
limit=10,
filters={"category": "tech"}
)
for doc in docs:
print(f"Document ID: {doc.external_id}")
print(f"Title: {doc.metadata.get('title')}")
print(f"Created: {doc.system_metadata['created_at']}")
# Get next page
next_page = db.list_documents(
skip=10,
limit=10,
filters={"category": "tech"}
)
Response:
[
{
"external_id": "doc_abc123",
"content_type": "text/plain",
"filename": "example.txt",
"metadata": {
"title": "Document Title",
"category": "tech"
},
"storage_info": {
"bucket": "your-bucket-name",
"key": "doc_abc123/example.txt"
},
"system_metadata": {
"created_at": "2024-03-20T10:30:00Z",
"updated_at": "2024-03-20T10:30:00Z",
"version": 1
},
"access_control": {
"readers": ["user_123"],
"writers": ["user_123"],
"admins": ["user_123"]
},
"chunk_ids": ["chunk_1", "chunk_2"]
}
]
GET
Document
GET
DocumentGet metadata for a specific document by its ID. Returns the complete document object including all metadata and storage information.
Parameters:
document_id
: The external ID of the document
Returns: Complete Document object with all metadata fields
from databridge import DataBridge
db = DataBridge(uri="your-databridge-uri")
# Get document by ID
doc = db.get_document("doc_abc123")
print(f"Title: {doc.metadata.get('title')}")
print(f"Created: {doc.system_metadata['created_at']}")
if doc.storage_info:
print(f"Storage: {doc.storage_info['bucket']}/{doc.storage_info['key']}")
Response:
{
"external_id": "doc_abc123",
"content_type": "text/plain",
"filename": "example.txt",
"metadata": {
"title": "Document Title",
"category": "tech"
},
"storage_info": {
"bucket": "your-bucket-name",
"key": "doc_abc123/example.txt"
},
"system_metadata": {
"created_at": "2024-03-20T10:30:00Z",
"updated_at": "2024-03-20T10:30:00Z",
"version": 1
},
"access_control": {
"readers": ["user_123"],
"writers": ["user_123"],
"admins": ["user_123"]
},
"chunk_ids": ["chunk_1", "chunk_2"]
}
Last updated