Lots of groundwork and the app runs but not currently working to generate embeddings.

This commit is contained in:
2026-01-13 21:48:42 -05:00
parent 5ce3a30588
commit 98ea050bd8
11 changed files with 431 additions and 0 deletions

View File

@@ -0,0 +1,27 @@
namespace VectorSearchApp.Configuration;
public class AppConfiguration
{
public QdrantConfiguration Qdrant { get; set; } = new();
public EmbeddingConfiguration Embedding { get; set; } = new();
public AppSettings App { get; set; } = new();
}
public class QdrantConfiguration
{
public string Host { get; set; } = "localhost";
public int GrpcPort { get; set; } = 6334;
public int HttpPort { get; set; } = 6333;
public string CollectionName { get; set; } = "addresses";
}
public class EmbeddingConfiguration
{
public string ModelName { get; set; } = "sentence-transformers/all-MiniLM-L6-v2";
public int Dimension { get; set; } = 384;
}
public class AppSettings
{
public int BatchSize { get; set; } = 10;
}

View File

@@ -0,0 +1,8 @@
namespace VectorSearchApp.Models;
public class Address
{
public Guid Id { get; set; } = Guid.NewGuid();
public string FullAddress { get; set; } = string.Empty;
public DateTime CreatedAt { get; set; } = DateTime.UtcNow;
}

View File

@@ -0,0 +1,8 @@
namespace VectorSearchApp.Models;
public class AddressEmbedding
{
public Guid Id { get; set; }
public string FullAddress { get; set; } = string.Empty;
public float[] Vector { get; set; } = Array.Empty<float>();
}

173
VectorSearchApp/Program.cs Normal file
View File

@@ -0,0 +1,173 @@
using Microsoft.Extensions.Configuration;
using VectorSearchApp.Configuration;
using VectorSearchApp.Models;
using VectorSearchApp.Services;
Console.WriteLine("=== Vector Search Address Application ===");
Console.WriteLine("Using sentence-transformers/all-MiniLM-L6-v2 model and Qdrant vector database");
Console.WriteLine();
// Load configuration
var configuration = new ConfigurationBuilder()
.SetBasePath(AppContext.BaseDirectory)
.AddJsonFile("appsettings.json", optional: false, reloadOnChange: true)
.Build();
var appConfig = new AppConfiguration();
configuration.GetSection("Qdrant").Bind(appConfig.Qdrant);
configuration.GetSection("Embedding").Bind(appConfig.Embedding);
configuration.GetSection("App").Bind(appConfig.App);
// Initialize services
Console.WriteLine("Initializing services...");
var embeddingService = new EmbeddingService(appConfig.Embedding);
IQdrantService? qdrantService = null;
try
{
qdrantService = new QdrantService(appConfig.Qdrant, appConfig.Embedding.Dimension);
Console.WriteLine("Initializing Qdrant collection...");
await qdrantService.InitializeCollectionAsync();
Console.WriteLine($"Collection '{appConfig.Qdrant.CollectionName}' is ready.");
}
catch (Exception ex)
{
Console.WriteLine($"Warning: Could not connect to Qdrant at {appConfig.Qdrant.Host}:{appConfig.Qdrant.GrpcPort}");
Console.WriteLine($"Error: {ex.Message}");
Console.WriteLine();
Console.WriteLine("Please ensure Qdrant is running. You can start it with:");
Console.WriteLine(" cd VectorSearchApp && docker-compose up -d");
Console.WriteLine();
Console.WriteLine("The application will continue, but address storage/search will not be available.");
Console.WriteLine();
}
Console.WriteLine("Type 'exit' to quit at any time.");
Console.WriteLine();
while (true)
{
Console.WriteLine("Options:");
Console.WriteLine(" 1. Add a new address");
Console.WriteLine(" 2. Search for similar addresses");
Console.WriteLine(" 3. Exit");
Console.Write("Select an option: ");
var option = Console.ReadLine()?.Trim();
if (option?.ToLower() == "exit" || option == "3")
{
Console.WriteLine("Goodbye!");
return;
}
switch (option)
{
case "1":
await AddAddressAsync(embeddingService, qdrantService, appConfig);
break;
case "2":
await SearchAddressesAsync(embeddingService, qdrantService, appConfig);
break;
default:
Console.WriteLine("Invalid option. Please try again.");
break;
}
Console.WriteLine();
}
async Task AddAddressAsync(IEmbeddingService embeddingService, IQdrantService? qdrantService, AppConfiguration config)
{
Console.Write("Enter the address: ");
var addressText = Console.ReadLine()?.Trim();
if (string.IsNullOrWhiteSpace(addressText))
{
Console.WriteLine("Address cannot be empty.");
return;
}
Console.WriteLine("Generating embedding...");
try
{
var embedding = await embeddingService.GenerateEmbeddingAsync(addressText);
Console.WriteLine($"Embedding generated (dimension: {embedding.Length})");
if (qdrantService != null)
{
var address = new Address
{
Id = Guid.NewGuid(),
FullAddress = addressText,
CreatedAt = DateTime.UtcNow
};
Console.WriteLine("Storing in Qdrant...");
await qdrantService.StoreAddressAsync(address, embedding);
Console.WriteLine($"Address stored successfully! (ID: {address.Id})");
}
else
{
Console.WriteLine("Address embedding generated but not stored (Qdrant not available).");
}
// Display first few values of embedding as confirmation
Console.Write("Embedding preview: [");
var previewCount = Math.Min(10, embedding.Length);
for (int i = 0; i < previewCount; i++)
{
Console.Write($"{embedding[i]:F4}");
if (i < previewCount - 1) Console.Write(", ");
}
if (embedding.Length > previewCount) Console.Write(", ...");
Console.WriteLine("]");
}
catch (Exception ex)
{
Console.WriteLine($"Error generating embedding: {ex.Message}");
}
}
async Task SearchAddressesAsync(IEmbeddingService embeddingService, IQdrantService? qdrantService, AppConfiguration config)
{
if (qdrantService == null)
{
Console.WriteLine("Search is not available because Qdrant is not connected.");
return;
}
Console.Write("Enter search query: ");
var query = Console.ReadLine()?.Trim();
if (string.IsNullOrWhiteSpace(query))
{
Console.WriteLine("Query cannot be empty.");
return;
}
Console.WriteLine("Generating query embedding...");
try
{
var queryEmbedding = await embeddingService.GenerateEmbeddingAsync(query);
Console.WriteLine($"Searching for similar addresses...");
var results = await qdrantService.SearchSimilarAddressesAsync(queryEmbedding, limit: 5);
if (results.Count == 0)
{
Console.WriteLine("No similar addresses found.");
return;
}
Console.WriteLine($"\nFound {results.Count} similar address(es):");
for (int i = 0; i < results.Count; i++)
{
Console.WriteLine($" {i + 1}. {results[i].FullAddress}");
}
}
catch (Exception ex)
{
Console.WriteLine($"Error during search: {ex.Message}");
}
}

View File

@@ -0,0 +1,52 @@
using System.Net.Http.Json;
using VectorSearchApp.Configuration;
using VectorSearchApp.Models;
namespace VectorSearchApp.Services;
public interface IEmbeddingService
{
Task<float[]> GenerateEmbeddingAsync(string text, CancellationToken cancellationToken = default);
}
public class EmbeddingService : IEmbeddingService
{
private readonly HttpClient _httpClient;
private readonly string _modelName;
private readonly int _dimension;
public EmbeddingService(EmbeddingConfiguration config)
{
_modelName = config.ModelName;
_dimension = config.Dimension;
_httpClient = new HttpClient
{
BaseAddress = new Uri("https://api-inference.huggingface.co/models/")
};
_httpClient.DefaultRequestHeaders.Add("User-Agent", "VectorSearchApp");
}
public async Task<float[]> GenerateEmbeddingAsync(string text, CancellationToken cancellationToken = default)
{
var request = new
{
inputs = text
};
var response = await _httpClient.PostAsJsonAsync(_modelName, request, cancellationToken);
if (!response.IsSuccessStatusCode)
{
throw new InvalidOperationException($"Failed to generate embedding: {response.StatusCode}");
}
var result = await response.Content.ReadFromJsonAsync<float[][]>(cancellationToken: cancellationToken);
if (result?.Length > 0 && result[0].Length > 0)
{
return result[0];
}
throw new InvalidOperationException("Failed to generate embedding");
}
}

View File

@@ -0,0 +1,69 @@
using Qdrant.Client;
using Qdrant.Client.Grpc;
using VectorSearchApp.Configuration;
using VectorSearchApp.Models;
namespace VectorSearchApp.Services;
public interface IQdrantService
{
Task InitializeCollectionAsync(CancellationToken cancellationToken = default);
Task StoreAddressAsync(Address address, float[] embedding, CancellationToken cancellationToken = default);
Task<List<AddressEmbedding>> SearchSimilarAddressesAsync(float[] queryEmbedding, int limit = 5, CancellationToken cancellationToken = default);
}
public class QdrantService : IQdrantService
{
private readonly QdrantClient _client;
private readonly string _collectionName;
private readonly int _vectorDimension;
public QdrantService(QdrantConfiguration config, int vectorDimension)
{
_client = new QdrantClient(config.Host, config.GrpcPort);
_collectionName = config.CollectionName;
_vectorDimension = vectorDimension;
}
public async Task InitializeCollectionAsync(CancellationToken cancellationToken = default)
{
var collections = await _client.ListCollectionsAsync(cancellationToken: cancellationToken);
if (!collections.Contains(_collectionName))
{
await _client.CreateCollectionAsync(_collectionName, new VectorParams
{
Size = (ulong)_vectorDimension,
Distance = Distance.Cosine
}, cancellationToken: cancellationToken);
}
}
public async Task StoreAddressAsync(Address address, float[] embedding, CancellationToken cancellationToken = default)
{
var point = new PointStruct
{
Id = new PointId { Uuid = address.Id.ToString() },
Vectors = embedding,
Payload =
{
["address"] = address.FullAddress,
["created_at"] = address.CreatedAt.ToString("O")
}
};
await _client.UpsertAsync(_collectionName, new[] { point }, cancellationToken: cancellationToken);
}
public async Task<List<AddressEmbedding>> SearchSimilarAddressesAsync(float[] queryEmbedding, int limit = 5, CancellationToken cancellationToken = default)
{
var results = await _client.SearchAsync(_collectionName, queryEmbedding, limit: (ulong)limit, cancellationToken: cancellationToken);
return results.Select(r => new AddressEmbedding
{
Id = Guid.Parse(r.Id.Uuid),
FullAddress = r.Payload["address"].StringValue,
Vector = Array.Empty<float>()
}).ToList();
}
}

View File

@@ -0,0 +1,23 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Configuration.Binder" Version="10.0.2" />
<PackageReference Include="Microsoft.Extensions.Configuration.Json" Version="10.0.2" />
<PackageReference Include="Microsoft.ML.OnnxRuntime" Version="1.23.2" />
<PackageReference Include="Qdrant.Client" Version="1.16.1" />
</ItemGroup>
<ItemGroup>
<None Update="appsettings.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>
</Project>

View File

@@ -0,0 +1,15 @@
{
"Qdrant": {
"Host": "localhost",
"GrpcPort": 6334,
"HttpPort": 6333,
"CollectionName": "addresses"
},
"Embedding": {
"ModelName": "sentence-transformers/all-MiniLM-L6-v2",
"Dimension": 384
},
"App": {
"BatchSize": 10
}
}

View File

@@ -0,0 +1,17 @@
version: '3.8'
services:
qdrant:
image: qdrant/qdrant:latest
container_name: vector-search-qdrant
ports:
- "6333:6333"
- "6334:6334"
volumes:
- qdrant_storage:/qdrant/storage
environment:
- QDRANT__SERVICE__API_GRPC_PORT=6334
- QDRANT__SERVICE__HTTP_PORT=6333
volumes:
qdrant_storage:

15
appsettings.json Normal file
View File

@@ -0,0 +1,15 @@
{
"Qdrant": {
"Host": "localhost",
"GrpcPort": 6334,
"HttpPort": 6333,
"CollectionName": "addresses"
},
"Embedding": {
"ModelName": "sentence-transformers/all-MiniLM-L6-v2",
"Dimension": 384
},
"App": {
"BatchSize": 10
}
}

24
vector-search-csharp.sln Normal file
View File

@@ -0,0 +1,24 @@
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.5.2.0
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "VectorSearchApp", "VectorSearchApp\VectorSearchApp.csproj", "{9180D2C9-AC14-132A-3557-75D22DD97295}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{9180D2C9-AC14-132A-3557-75D22DD97295}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{9180D2C9-AC14-132A-3557-75D22DD97295}.Debug|Any CPU.Build.0 = Debug|Any CPU
{9180D2C9-AC14-132A-3557-75D22DD97295}.Release|Any CPU.ActiveCfg = Release|Any CPU
{9180D2C9-AC14-132A-3557-75D22DD97295}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {717EF272-7C5B-4D0F-8258-9F56AA1D2507}
EndGlobalSection
EndGlobal