From 98ea050bd8f2865badadeff9c0de6ceac657082f Mon Sep 17 00:00:00 2001 From: vidane Date: Tue, 13 Jan 2026 21:48:42 -0500 Subject: [PATCH] Lots of groundwork and the app runs but not currently working to generate embeddings. --- .../Configuration/AppConfiguration.cs | 27 +++ VectorSearchApp/Models/Address.cs | 8 + VectorSearchApp/Models/AddressEmbedding.cs | 8 + VectorSearchApp/Program.cs | 173 ++++++++++++++++++ VectorSearchApp/Services/EmbeddingService.cs | 52 ++++++ VectorSearchApp/Services/QdrantService.cs | 69 +++++++ VectorSearchApp/VectorSearchApp.csproj | 23 +++ VectorSearchApp/appsettings.json | 15 ++ VectorSearchApp/docker-compose.yml | 17 ++ appsettings.json | 15 ++ vector-search-csharp.sln | 24 +++ 11 files changed, 431 insertions(+) create mode 100644 VectorSearchApp/Configuration/AppConfiguration.cs create mode 100644 VectorSearchApp/Models/Address.cs create mode 100644 VectorSearchApp/Models/AddressEmbedding.cs create mode 100644 VectorSearchApp/Program.cs create mode 100644 VectorSearchApp/Services/EmbeddingService.cs create mode 100644 VectorSearchApp/Services/QdrantService.cs create mode 100644 VectorSearchApp/VectorSearchApp.csproj create mode 100644 VectorSearchApp/appsettings.json create mode 100644 VectorSearchApp/docker-compose.yml create mode 100644 appsettings.json create mode 100644 vector-search-csharp.sln diff --git a/VectorSearchApp/Configuration/AppConfiguration.cs b/VectorSearchApp/Configuration/AppConfiguration.cs new file mode 100644 index 0000000..c0631c1 --- /dev/null +++ b/VectorSearchApp/Configuration/AppConfiguration.cs @@ -0,0 +1,27 @@ +namespace VectorSearchApp.Configuration; + +public class AppConfiguration +{ + public QdrantConfiguration Qdrant { get; set; } = new(); + public EmbeddingConfiguration Embedding { get; set; } = new(); + public AppSettings App { get; set; } = new(); +} + +public class QdrantConfiguration +{ + public string Host { get; set; } = "localhost"; + public int GrpcPort { get; set; } = 6334; + public int HttpPort { get; set; } = 6333; + public string CollectionName { get; set; } = "addresses"; +} + +public class EmbeddingConfiguration +{ + public string ModelName { get; set; } = "sentence-transformers/all-MiniLM-L6-v2"; + public int Dimension { get; set; } = 384; +} + +public class AppSettings +{ + public int BatchSize { get; set; } = 10; +} \ No newline at end of file diff --git a/VectorSearchApp/Models/Address.cs b/VectorSearchApp/Models/Address.cs new file mode 100644 index 0000000..16bc3b6 --- /dev/null +++ b/VectorSearchApp/Models/Address.cs @@ -0,0 +1,8 @@ +namespace VectorSearchApp.Models; + +public class Address +{ + public Guid Id { get; set; } = Guid.NewGuid(); + public string FullAddress { get; set; } = string.Empty; + public DateTime CreatedAt { get; set; } = DateTime.UtcNow; +} \ No newline at end of file diff --git a/VectorSearchApp/Models/AddressEmbedding.cs b/VectorSearchApp/Models/AddressEmbedding.cs new file mode 100644 index 0000000..dc7a059 --- /dev/null +++ b/VectorSearchApp/Models/AddressEmbedding.cs @@ -0,0 +1,8 @@ +namespace VectorSearchApp.Models; + +public class AddressEmbedding +{ + public Guid Id { get; set; } + public string FullAddress { get; set; } = string.Empty; + public float[] Vector { get; set; } = Array.Empty(); +} \ No newline at end of file diff --git a/VectorSearchApp/Program.cs b/VectorSearchApp/Program.cs new file mode 100644 index 0000000..393aa2b --- /dev/null +++ b/VectorSearchApp/Program.cs @@ -0,0 +1,173 @@ +using Microsoft.Extensions.Configuration; +using VectorSearchApp.Configuration; +using VectorSearchApp.Models; +using VectorSearchApp.Services; + +Console.WriteLine("=== Vector Search Address Application ==="); +Console.WriteLine("Using sentence-transformers/all-MiniLM-L6-v2 model and Qdrant vector database"); +Console.WriteLine(); + +// Load configuration +var configuration = new ConfigurationBuilder() + .SetBasePath(AppContext.BaseDirectory) + .AddJsonFile("appsettings.json", optional: false, reloadOnChange: true) + .Build(); + +var appConfig = new AppConfiguration(); +configuration.GetSection("Qdrant").Bind(appConfig.Qdrant); +configuration.GetSection("Embedding").Bind(appConfig.Embedding); +configuration.GetSection("App").Bind(appConfig.App); + +// Initialize services +Console.WriteLine("Initializing services..."); +var embeddingService = new EmbeddingService(appConfig.Embedding); +IQdrantService? qdrantService = null; + +try +{ + qdrantService = new QdrantService(appConfig.Qdrant, appConfig.Embedding.Dimension); + Console.WriteLine("Initializing Qdrant collection..."); + await qdrantService.InitializeCollectionAsync(); + Console.WriteLine($"Collection '{appConfig.Qdrant.CollectionName}' is ready."); +} +catch (Exception ex) +{ + Console.WriteLine($"Warning: Could not connect to Qdrant at {appConfig.Qdrant.Host}:{appConfig.Qdrant.GrpcPort}"); + Console.WriteLine($"Error: {ex.Message}"); + Console.WriteLine(); + Console.WriteLine("Please ensure Qdrant is running. You can start it with:"); + Console.WriteLine(" cd VectorSearchApp && docker-compose up -d"); + Console.WriteLine(); + Console.WriteLine("The application will continue, but address storage/search will not be available."); + Console.WriteLine(); +} + +Console.WriteLine("Type 'exit' to quit at any time."); +Console.WriteLine(); + +while (true) +{ + Console.WriteLine("Options:"); + Console.WriteLine(" 1. Add a new address"); + Console.WriteLine(" 2. Search for similar addresses"); + Console.WriteLine(" 3. Exit"); + Console.Write("Select an option: "); + + var option = Console.ReadLine()?.Trim(); + + if (option?.ToLower() == "exit" || option == "3") + { + Console.WriteLine("Goodbye!"); + return; + } + + switch (option) + { + case "1": + await AddAddressAsync(embeddingService, qdrantService, appConfig); + break; + case "2": + await SearchAddressesAsync(embeddingService, qdrantService, appConfig); + break; + default: + Console.WriteLine("Invalid option. Please try again."); + break; + } + + Console.WriteLine(); +} + +async Task AddAddressAsync(IEmbeddingService embeddingService, IQdrantService? qdrantService, AppConfiguration config) +{ + Console.Write("Enter the address: "); + var addressText = Console.ReadLine()?.Trim(); + + if (string.IsNullOrWhiteSpace(addressText)) + { + Console.WriteLine("Address cannot be empty."); + return; + } + + Console.WriteLine("Generating embedding..."); + try + { + var embedding = await embeddingService.GenerateEmbeddingAsync(addressText); + Console.WriteLine($"Embedding generated (dimension: {embedding.Length})"); + + if (qdrantService != null) + { + var address = new Address + { + Id = Guid.NewGuid(), + FullAddress = addressText, + CreatedAt = DateTime.UtcNow + }; + + Console.WriteLine("Storing in Qdrant..."); + await qdrantService.StoreAddressAsync(address, embedding); + Console.WriteLine($"Address stored successfully! (ID: {address.Id})"); + } + else + { + Console.WriteLine("Address embedding generated but not stored (Qdrant not available)."); + } + + // Display first few values of embedding as confirmation + Console.Write("Embedding preview: ["); + var previewCount = Math.Min(10, embedding.Length); + for (int i = 0; i < previewCount; i++) + { + Console.Write($"{embedding[i]:F4}"); + if (i < previewCount - 1) Console.Write(", "); + } + if (embedding.Length > previewCount) Console.Write(", ..."); + Console.WriteLine("]"); + } + catch (Exception ex) + { + Console.WriteLine($"Error generating embedding: {ex.Message}"); + } +} + +async Task SearchAddressesAsync(IEmbeddingService embeddingService, IQdrantService? qdrantService, AppConfiguration config) +{ + if (qdrantService == null) + { + Console.WriteLine("Search is not available because Qdrant is not connected."); + return; + } + + Console.Write("Enter search query: "); + var query = Console.ReadLine()?.Trim(); + + if (string.IsNullOrWhiteSpace(query)) + { + Console.WriteLine("Query cannot be empty."); + return; + } + + Console.WriteLine("Generating query embedding..."); + try + { + var queryEmbedding = await embeddingService.GenerateEmbeddingAsync(query); + Console.WriteLine($"Searching for similar addresses..."); + + var results = await qdrantService.SearchSimilarAddressesAsync(queryEmbedding, limit: 5); + + if (results.Count == 0) + { + Console.WriteLine("No similar addresses found."); + return; + } + + Console.WriteLine($"\nFound {results.Count} similar address(es):"); + for (int i = 0; i < results.Count; i++) + { + Console.WriteLine($" {i + 1}. {results[i].FullAddress}"); + } + } + catch (Exception ex) + { + Console.WriteLine($"Error during search: {ex.Message}"); + } +} \ No newline at end of file diff --git a/VectorSearchApp/Services/EmbeddingService.cs b/VectorSearchApp/Services/EmbeddingService.cs new file mode 100644 index 0000000..63afecc --- /dev/null +++ b/VectorSearchApp/Services/EmbeddingService.cs @@ -0,0 +1,52 @@ +using System.Net.Http.Json; +using VectorSearchApp.Configuration; +using VectorSearchApp.Models; + +namespace VectorSearchApp.Services; + +public interface IEmbeddingService +{ + Task GenerateEmbeddingAsync(string text, CancellationToken cancellationToken = default); +} + +public class EmbeddingService : IEmbeddingService +{ + private readonly HttpClient _httpClient; + private readonly string _modelName; + private readonly int _dimension; + + public EmbeddingService(EmbeddingConfiguration config) + { + _modelName = config.ModelName; + _dimension = config.Dimension; + _httpClient = new HttpClient + { + BaseAddress = new Uri("https://api-inference.huggingface.co/models/") + }; + _httpClient.DefaultRequestHeaders.Add("User-Agent", "VectorSearchApp"); + } + + public async Task GenerateEmbeddingAsync(string text, CancellationToken cancellationToken = default) + { + var request = new + { + inputs = text + }; + + var response = await _httpClient.PostAsJsonAsync(_modelName, request, cancellationToken); + + if (!response.IsSuccessStatusCode) + { + throw new InvalidOperationException($"Failed to generate embedding: {response.StatusCode}"); + } + + var result = await response.Content.ReadFromJsonAsync(cancellationToken: cancellationToken); + + if (result?.Length > 0 && result[0].Length > 0) + { + return result[0]; + } + + throw new InvalidOperationException("Failed to generate embedding"); + } +} \ No newline at end of file diff --git a/VectorSearchApp/Services/QdrantService.cs b/VectorSearchApp/Services/QdrantService.cs new file mode 100644 index 0000000..ac94fa0 --- /dev/null +++ b/VectorSearchApp/Services/QdrantService.cs @@ -0,0 +1,69 @@ +using Qdrant.Client; +using Qdrant.Client.Grpc; +using VectorSearchApp.Configuration; +using VectorSearchApp.Models; + +namespace VectorSearchApp.Services; + +public interface IQdrantService +{ + Task InitializeCollectionAsync(CancellationToken cancellationToken = default); + Task StoreAddressAsync(Address address, float[] embedding, CancellationToken cancellationToken = default); + Task> SearchSimilarAddressesAsync(float[] queryEmbedding, int limit = 5, CancellationToken cancellationToken = default); +} + +public class QdrantService : IQdrantService +{ + private readonly QdrantClient _client; + private readonly string _collectionName; + private readonly int _vectorDimension; + + public QdrantService(QdrantConfiguration config, int vectorDimension) + { + _client = new QdrantClient(config.Host, config.GrpcPort); + _collectionName = config.CollectionName; + _vectorDimension = vectorDimension; + } + + public async Task InitializeCollectionAsync(CancellationToken cancellationToken = default) + { + var collections = await _client.ListCollectionsAsync(cancellationToken: cancellationToken); + + if (!collections.Contains(_collectionName)) + { + await _client.CreateCollectionAsync(_collectionName, new VectorParams + { + Size = (ulong)_vectorDimension, + Distance = Distance.Cosine + }, cancellationToken: cancellationToken); + } + } + + public async Task StoreAddressAsync(Address address, float[] embedding, CancellationToken cancellationToken = default) + { + var point = new PointStruct + { + Id = new PointId { Uuid = address.Id.ToString() }, + Vectors = embedding, + Payload = + { + ["address"] = address.FullAddress, + ["created_at"] = address.CreatedAt.ToString("O") + } + }; + + await _client.UpsertAsync(_collectionName, new[] { point }, cancellationToken: cancellationToken); + } + + public async Task> SearchSimilarAddressesAsync(float[] queryEmbedding, int limit = 5, CancellationToken cancellationToken = default) + { + var results = await _client.SearchAsync(_collectionName, queryEmbedding, limit: (ulong)limit, cancellationToken: cancellationToken); + + return results.Select(r => new AddressEmbedding + { + Id = Guid.Parse(r.Id.Uuid), + FullAddress = r.Payload["address"].StringValue, + Vector = Array.Empty() + }).ToList(); + } +} \ No newline at end of file diff --git a/VectorSearchApp/VectorSearchApp.csproj b/VectorSearchApp/VectorSearchApp.csproj new file mode 100644 index 0000000..50d1a8c --- /dev/null +++ b/VectorSearchApp/VectorSearchApp.csproj @@ -0,0 +1,23 @@ + + + + Exe + net8.0 + enable + enable + + + + + + + + + + + + PreserveNewest + + + + diff --git a/VectorSearchApp/appsettings.json b/VectorSearchApp/appsettings.json new file mode 100644 index 0000000..9f810c5 --- /dev/null +++ b/VectorSearchApp/appsettings.json @@ -0,0 +1,15 @@ +{ + "Qdrant": { + "Host": "localhost", + "GrpcPort": 6334, + "HttpPort": 6333, + "CollectionName": "addresses" + }, + "Embedding": { + "ModelName": "sentence-transformers/all-MiniLM-L6-v2", + "Dimension": 384 + }, + "App": { + "BatchSize": 10 + } +} \ No newline at end of file diff --git a/VectorSearchApp/docker-compose.yml b/VectorSearchApp/docker-compose.yml new file mode 100644 index 0000000..25c1acc --- /dev/null +++ b/VectorSearchApp/docker-compose.yml @@ -0,0 +1,17 @@ +version: '3.8' + +services: + qdrant: + image: qdrant/qdrant:latest + container_name: vector-search-qdrant + ports: + - "6333:6333" + - "6334:6334" + volumes: + - qdrant_storage:/qdrant/storage + environment: + - QDRANT__SERVICE__API_GRPC_PORT=6334 + - QDRANT__SERVICE__HTTP_PORT=6333 + +volumes: + qdrant_storage: \ No newline at end of file diff --git a/appsettings.json b/appsettings.json new file mode 100644 index 0000000..9f810c5 --- /dev/null +++ b/appsettings.json @@ -0,0 +1,15 @@ +{ + "Qdrant": { + "Host": "localhost", + "GrpcPort": 6334, + "HttpPort": 6333, + "CollectionName": "addresses" + }, + "Embedding": { + "ModelName": "sentence-transformers/all-MiniLM-L6-v2", + "Dimension": 384 + }, + "App": { + "BatchSize": 10 + } +} \ No newline at end of file diff --git a/vector-search-csharp.sln b/vector-search-csharp.sln new file mode 100644 index 0000000..e35790e --- /dev/null +++ b/vector-search-csharp.sln @@ -0,0 +1,24 @@ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.5.2.0 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "VectorSearchApp", "VectorSearchApp\VectorSearchApp.csproj", "{9180D2C9-AC14-132A-3557-75D22DD97295}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {9180D2C9-AC14-132A-3557-75D22DD97295}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {9180D2C9-AC14-132A-3557-75D22DD97295}.Debug|Any CPU.Build.0 = Debug|Any CPU + {9180D2C9-AC14-132A-3557-75D22DD97295}.Release|Any CPU.ActiveCfg = Release|Any CPU + {9180D2C9-AC14-132A-3557-75D22DD97295}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {717EF272-7C5B-4D0F-8258-9F56AA1D2507} + EndGlobalSection +EndGlobal