Wiki source code of FactHarbor POC1 Architecture Analysis
Version 6.1 by Robert Schaub on 2026/01/02 10:06
Show last authors
| author | version | line-number | content |
|---|---|---|---|
| 1 | = FactHarbor POC1 Architecture Analysis = | ||
| 2 | |||
| 3 | **Version:** 2.6.17 | ||
| 4 | **Analysis Date:** January 2026 | ||
| 5 | **Document Purpose:** Technical diagrams, gap analysis, and optimization recommendations | ||
| 6 | |||
| 7 | ---- | ||
| 8 | |||
| 9 | == 1. AKEL Flow Diagram (with LLM and WebSearch Interactions) == | ||
| 10 | |||
| 11 | |||
| 12 | {{mermaid}} | ||
| 13 | flowchart TB | ||
| 14 | subgraph Input["📥 Input Layer"] | ||
| 15 | URL[URL Input] | ||
| 16 | TEXT[Text Input] | ||
| 17 | end | ||
| 18 | |||
| 19 | subgraph Retrieval["🔍 Content Retrieval"] | ||
| 20 | FETCH[extractTextFromUrl] | ||
| 21 | PDF[PDF Parser<br/>pdf-parse v1] | ||
| 22 | HTML[HTML Parser<br/>cheerio] | ||
| 23 | end | ||
| 24 | |||
| 25 | subgraph AKEL["🧠 AKEL Pipeline"] | ||
| 26 | direction TB | ||
| 27 | |||
| 28 | subgraph Step1["Step 1: Understand"] | ||
| 29 | UNDERSTAND[understandClaim<br/>━━━━━━━━━━━━━<br/>• Detect input type<br/>• Extract claims<br/>• Identify dependencies<br/>• Assign risk tiers] | ||
| 30 | LLM1[("🤖 LLM Call #1<br/>Claude/GPT/Gemini")] | ||
| 31 | end | ||
| 32 | |||
| 33 | subgraph Step2["Step 2: Research (Iterative)"] | ||
| 34 | DECIDE[decideNextResearch<br/>━━━━━━━━━━━━━<br/>• Generate queries<br/>• Focus areas] | ||
| 35 | |||
| 36 | SEARCH[("🌐 Web Search<br/>Google CSE / SerpAPI")] | ||
| 37 | |||
| 38 | FETCHSRC[fetchSourceContent<br/>━━━━━━━━━━━━━<br/>• Parallel fetching<br/>• Timeout handling] | ||
| 39 | |||
| 40 | EXTRACT[extractFacts<br/>━━━━━━━━━━━━━<br/>• Parse sources<br/>• Extract facts] | ||
| 41 | LLM2[("🤖 LLM Call #2-N<br/>Per source")] | ||
| 42 | end | ||
| 43 | |||
| 44 | subgraph Step3["Step 3: Verdict Generation"] | ||
| 45 | VERDICT[generateVerdicts<br/>━━━━━━━━━━━━━<br/>• Claim verdicts<br/>• Article verdict<br/>• Dependency propagation] | ||
| 46 | LLM3[("🤖 LLM Call #N+1<br/>Final synthesis")] | ||
| 47 | end | ||
| 48 | |||
| 49 | subgraph Step4["Step 4: Report"] | ||
| 50 | REPORT[buildTwoPanelSummary<br/>━━━━━━━━━━━━━<br/>• Format results<br/>• Generate markdown] | ||
| 51 | end | ||
| 52 | end | ||
| 53 | |||
| 54 | subgraph Output["📤 Output"] | ||
| 55 | RESULT[AnalysisResult JSON] | ||
| 56 | MARKDOWN[Report Markdown] | ||
| 57 | end | ||
| 58 | |||
| 59 | %% Flow connections | ||
| 60 | URL --> FETCH | ||
| 61 | TEXT --> UNDERSTAND | ||
| 62 | FETCH --> PDF | ||
| 63 | FETCH --> HTML | ||
| 64 | PDF --> UNDERSTAND | ||
| 65 | HTML --> UNDERSTAND | ||
| 66 | |||
| 67 | UNDERSTAND --> LLM1 | ||
| 68 | LLM1 --> DECIDE | ||
| 69 | |||
| 70 | DECIDE --> SEARCH | ||
| 71 | SEARCH --> FETCHSRC | ||
| 72 | FETCHSRC --> EXTRACT | ||
| 73 | EXTRACT --> LLM2 | ||
| 74 | LLM2 --> DECIDE | ||
| 75 | |||
| 76 | DECIDE -->|"Research Complete"| VERDICT | ||
| 77 | VERDICT --> LLM3 | ||
| 78 | LLM3 --> REPORT | ||
| 79 | |||
| 80 | REPORT --> RESULT | ||
| 81 | REPORT --> MARKDOWN | ||
| 82 | |||
| 83 | %% Styling | ||
| 84 | classDef llm fill:#e1f5fe,stroke:#01579b,stroke-width:2px | ||
| 85 | classDef search fill:#fff3e0,stroke:#e65100,stroke-width:2px | ||
| 86 | classDef step fill:#f3e5f5,stroke:#4a148c,stroke-width:2px | ||
| 87 | |||
| 88 | class LLM1,LLM2,LLM3 llm | ||
| 89 | class SEARCH search | ||
| 90 | class UNDERSTAND,DECIDE,FETCHSRC,EXTRACT,VERDICT,REPORT step | ||
| 91 | {{/mermaid}} | ||
| 92 | |||
| 93 | ---- | ||
| 94 | |||
| 95 | == 2. ERD Data Model (Current POC1 Implementation) == | ||
| 96 | |||
| 97 | {{mermaid}} | ||
| 98 | erDiagram | ||
| 99 | JOB ||--o{ JOB_EVENT : "has" | ||
| 100 | JOB ||--|| ANALYSIS_RESULT : "produces" | ||
| 101 | ANALYSIS_RESULT ||--o{ CLAIM_VERDICT : "contains" | ||
| 102 | ANALYSIS_RESULT ||--o{ FETCHED_SOURCE : "references" | ||
| 103 | ANALYSIS_RESULT ||--o{ EXTRACTED_FACT : "contains" | ||
| 104 | CLAIM_VERDICT }o--o{ EXTRACTED_FACT : "supported by" | ||
| 105 | FETCHED_SOURCE ||--o{ EXTRACTED_FACT : "provides" | ||
| 106 | CLAIM_VERDICT ||--o{ CLAIM_VERDICT : "depends on" | ||
| 107 | |||
| 108 | JOB { | ||
| 109 | string JobId PK "GUID" | ||
| 110 | string Status "QUEUED|RUNNING|COMPLETE|FAILED" | ||
| 111 | int Progress "0-100" | ||
| 112 | datetime CreatedUtc | ||
| 113 | datetime UpdatedUtc | ||
| 114 | string InputType "text|url" | ||
| 115 | string InputValue "URL or text content" | ||
| 116 | string InputPreview "First 100 chars" | ||
| 117 | json ResultJson "Full analysis result" | ||
| 118 | string ReportMarkdown "Formatted report" | ||
| 119 | } | ||
| 120 | |||
| 121 | JOB_EVENT { | ||
| 122 | long Id PK | ||
| 123 | string JobId FK | ||
| 124 | datetime TsUtc | ||
| 125 | string Level "info|warn|error" | ||
| 126 | string Message | ||
| 127 | } | ||
| 128 | |||
| 129 | ANALYSIS_RESULT { | ||
| 130 | string schemaVersion "2.6.17" | ||
| 131 | string inputType "question|claim|article" | ||
| 132 | boolean isQuestion | ||
| 133 | string articleThesis | ||
| 134 | int articleTruthPercentage "0-100" | ||
| 135 | string articleVerdict "7-point scale" | ||
| 136 | json claimPattern "total/supported/uncertain/refuted" | ||
| 137 | boolean isPseudoscience | ||
| 138 | int llmCalls "Total LLM invocations" | ||
| 139 | json searchQueries "All search queries" | ||
| 140 | } | ||
| 141 | |||
| 142 | CLAIM_VERDICT { | ||
| 143 | string claimId PK "SC1, SC2, etc." | ||
| 144 | string claimText | ||
| 145 | boolean isCentral | ||
| 146 | string claimRole "attribution|source|timing|core" | ||
| 147 | string_array dependsOn "Prerequisite claim IDs" | ||
| 148 | boolean dependencyFailed | ||
| 149 | string llmVerdict "WELL-SUPPORTED|PARTIALLY-SUPPORTED|UNCERTAIN|REFUTED" | ||
| 150 | string verdict "7-point: True to False" | ||
| 151 | int confidence "0-100" | ||
| 152 | int truthPercentage "0-100" | ||
| 153 | string riskTier "A|B|C" | ||
| 154 | string reasoning | ||
| 155 | string_array supportingFactIds | ||
| 156 | string highlightColor "green to dark-red" | ||
| 157 | } | ||
| 158 | |||
| 159 | FETCHED_SOURCE { | ||
| 160 | string id PK "S1, S2, etc." | ||
| 161 | string url | ||
| 162 | string title | ||
| 163 | int trackRecordScore "0-100 or null" | ||
| 164 | string fullText "Extracted content" | ||
| 165 | datetime fetchedAt | ||
| 166 | string category "legal|news|academic" | ||
| 167 | boolean fetchSuccess | ||
| 168 | string searchQuery "Which query found this" | ||
| 169 | } | ||
| 170 | |||
| 171 | EXTRACTED_FACT { | ||
| 172 | string id PK "S1-F1, S1-F2, etc." | ||
| 173 | string fact "The factual statement" | ||
| 174 | string category "legal_provision|evidence|expert_quote|statistic|event|criticism" | ||
| 175 | string specificity "high|medium" | ||
| 176 | string sourceId FK | ||
| 177 | string sourceUrl | ||
| 178 | string sourceTitle | ||
| 179 | string sourceExcerpt | ||
| 180 | string relatedProceedingId | ||
| 181 | boolean isContestedClaim | ||
| 182 | string claimSource | ||
| 183 | } | ||
| 184 | {{/mermaid}} | ||
| 185 | |||
| 186 | ---- | ||
| 187 | |||
| 188 | == 3. Overall Architecture with Interactions == | ||
| 189 | |||
| 190 | {{mermaid}} | ||
| 191 | flowchart TB | ||
| 192 | subgraph Client["🖥️ Client Layer"] | ||
| 193 | BROWSER[Web Browser] | ||
| 194 | ANALYZE_PAGE["/analyze page<br/>React + TailwindCSS"] | ||
| 195 | JOBS_PAGE["/jobs page<br/>Job history & status"] | ||
| 196 | end | ||
| 197 | |||
| 198 | subgraph NextJS["⚡ Next.js Web App (apps/web)"] | ||
| 199 | direction TB | ||
| 200 | |||
| 201 | subgraph API_Routes["API Routes"] | ||
| 202 | ANALYZE_API["/api/fh/analyze<br/>━━━━━━━━━━━━━<br/>POST: Create job"] | ||
| 203 | JOBS_API["/api/fh/jobs<br/>━━━━━━━━━━━━━<br/>GET: List jobs<br/>POST: Create job"] | ||
| 204 | JOB_API["/api/fh/jobs/[id]<br/>━━━━━━━━━━━━━<br/>GET: Job status"] | ||
| 205 | EVENTS_API["/api/fh/jobs/[id]/events<br/>━━━━━━━━━━━━━<br/>GET: Job events (SSE)"] | ||
| 206 | RUN_JOB["/api/internal/run-job<br/>━━━━━━━━━━━━━<br/>POST: Execute analysis"] | ||
| 207 | end | ||
| 208 | |||
| 209 | subgraph Lib["Core Libraries"] | ||
| 210 | ANALYZER["analyzer.ts<br/>━━━━━━━━━━━━━<br/>AKEL Pipeline<br/>2918 lines"] | ||
| 211 | RETRIEVAL["retrieval.ts<br/>━━━━━━━━━━━━━<br/>URL content extraction"] | ||
| 212 | WEBSEARCH["web-search.ts<br/>━━━━━━━━━━━━━<br/>Search abstraction"] | ||
| 213 | MBFC["mbfc-loader.ts<br/>━━━━━━━━━━━━━<br/>Source reliability"] | ||
| 214 | end | ||
| 215 | end | ||
| 216 | |||
| 217 | subgraph DotNet["🔧 .NET API (apps/api)"] | ||
| 218 | DOTNET_API["FactHarbor.Api<br/>ASP.NET Core"] | ||
| 219 | |||
| 220 | subgraph Controllers["Controllers"] | ||
| 221 | ANALYZE_CTRL["AnalyzeController"] | ||
| 222 | JOBS_CTRL["JobsController"] | ||
| 223 | INTERNAL_CTRL["InternalJobsController"] | ||
| 224 | end | ||
| 225 | |||
| 226 | subgraph Services["Services"] | ||
| 227 | JOB_SVC["JobService<br/>━━━━━━━━━━━━━<br/>Job CRUD operations"] | ||
| 228 | RUNNER_CLIENT["RunnerClient<br/>━━━━━━━━━━━━━<br/>Calls Next.js runner"] | ||
| 229 | end | ||
| 230 | |||
| 231 | DB[(SQLite Database<br/>━━━━━━━━━━━━━<br/>JobEntity<br/>JobEventEntity)] | ||
| 232 | end | ||
| 233 | |||
| 234 | subgraph External["🌐 External Services"] | ||
| 235 | LLM_PROVIDERS["LLM Providers<br/>━━━━━━━━━━━━━<br/>• Anthropic Claude<br/>• OpenAI GPT<br/>• Google Gemini<br/>• Mistral"] | ||
| 236 | SEARCH_PROVIDERS["Search Providers<br/>━━━━━━━━━━━━━<br/>• Google CSE<br/>• SerpAPI<br/>• Brave<br/>• Tavily"] | ||
| 237 | WEB["Web Content<br/>━━━━━━━━━━━━━<br/>• News sites<br/>• PDFs<br/>• Academic sources"] | ||
| 238 | end | ||
| 239 | |||
| 240 | %% Client interactions | ||
| 241 | BROWSER --> ANALYZE_PAGE | ||
| 242 | BROWSER --> JOBS_PAGE | ||
| 243 | ANALYZE_PAGE --> ANALYZE_API | ||
| 244 | JOBS_PAGE --> JOBS_API | ||
| 245 | |||
| 246 | %% Next.js internal | ||
| 247 | ANALYZE_API --> JOBS_API | ||
| 248 | JOBS_API -->|"Proxy"| DOTNET_API | ||
| 249 | JOB_API -->|"Proxy"| DOTNET_API | ||
| 250 | EVENTS_API -->|"Proxy"| DOTNET_API | ||
| 251 | |||
| 252 | %% .NET flow | ||
| 253 | DOTNET_API --> ANALYZE_CTRL | ||
| 254 | DOTNET_API --> JOBS_CTRL | ||
| 255 | DOTNET_API --> INTERNAL_CTRL | ||
| 256 | ANALYZE_CTRL --> JOB_SVC | ||
| 257 | JOBS_CTRL --> JOB_SVC | ||
| 258 | JOB_SVC --> DB | ||
| 259 | JOB_SVC --> RUNNER_CLIENT | ||
| 260 | RUNNER_CLIENT -->|"HTTP POST"| RUN_JOB | ||
| 261 | |||
| 262 | %% Analysis execution | ||
| 263 | RUN_JOB --> ANALYZER | ||
| 264 | ANALYZER --> RETRIEVAL | ||
| 265 | ANALYZER --> WEBSEARCH | ||
| 266 | ANALYZER --> MBFC | ||
| 267 | |||
| 268 | %% External calls | ||
| 269 | ANALYZER -->|"AI SDK"| LLM_PROVIDERS | ||
| 270 | WEBSEARCH --> SEARCH_PROVIDERS | ||
| 271 | RETRIEVAL --> WEB | ||
| 272 | |||
| 273 | %% Styling | ||
| 274 | classDef external fill:#fff3e0,stroke:#e65100,stroke-width:2px | ||
| 275 | classDef core fill:#e8f5e9,stroke:#2e7d32,stroke-width:2px | ||
| 276 | classDef api fill:#e3f2fd,stroke:#1565c0,stroke-width:2px | ||
| 277 | |||
| 278 | class LLM_PROVIDERS,SEARCH_PROVIDERS,WEB external | ||
| 279 | class ANALYZER,RETRIEVAL,WEBSEARCH,MBFC core | ||
| 280 | class ANALYZE_API,JOBS_API,JOB_API,EVENTS_API,RUN_JOB api | ||
| 281 | {{/mermaid}} | ||
| 282 | |||
| 283 | ---- | ||
| 284 | |||
| 285 | == 4. Specification vs Implementation Gap Analysis == | ||
| 286 | |||
| 287 | === 4.1 Data Model Gaps === | ||
| 288 | |||
| 289 | | Specification Entity | POC1 Status | Gap Description | | ||
| 290 | |-|-|-| | ||
| 291 | | **Claim** | ⚠️ Partial | No persistent storage; claims exist only in JSON result. Missing: `status`, `confidence_score`, `risk_score`, `completeness_score`, `version`, `views`, `edit_count` | | ||
| 292 | | **Evidence** | ⚠️ Partial | Implemented as `ExtractedFact` but lacks: `supports` enum, proper `relevance_score` | | ||
| 293 | | **Source** | ⚠️ Partial | `FetchedSource` exists but missing: `type` enum, `accuracy_history`, `correction_frequency`, weekly update scheduler | | ||
| 294 | | **Scenario** | ❌ Missing | Not implemented. Claims are evaluated directly without scenario contexts | | ||
| 295 | | **Verdict** | ⚠️ Partial | `ClaimVerdict` exists but missing: `likelihood_range`, `uncertainty_factors` array, proper `explanation_summary` | | ||
| 296 | | **User** | ❌ Missing | No user authentication or role system | | ||
| 297 | | **Edit** | ❌ Missing | No audit trail for changes | | ||
| 298 | |||
| 299 | === 4.2 AKEL Component Gaps === | ||
| 300 | |||
| 301 | | Spec Component | POC1 Status | Gap Description | | ||
| 302 | | |-|-| | ||
| 303 | | **AKEL Orchestrator** | ✅ Implemented | `runAnalysis()` function serves this role | | ||
| 304 | | **Claim Extractor** | ✅ Implemented | `understandClaim()` with claim role/dependency tracking | | ||
| 305 | | **Claim Classifier** | ⚠️ Partial | Risk tier (A/B/C) assigned, but no domain classification | | ||
| 306 | | **Scenario Generator** | ❌ Missing | Claims evaluated without scenario extraction | | ||
| 307 | | **Evidence Summarizer** | ✅ Implemented | `extractFacts()` function | | ||
| 308 | | **Contradiction Detector** | ⚠️ Partial | `isContestedClaim` flag exists but no active contradiction search | | ||
| 309 | | **Quality Gate Validator** | ❌ Missing | No source quality gates, no mandatory checks | | ||
| 310 | | **Audit Sampling Scheduler** | ❌ Missing | No audit system | | ||
| 311 | | **Embedding Handler** | ❌ Missing | Not needed for POC | | ||
| 312 | | **Federation Sync** | ❌ Missing | Not needed for POC | | ||
| 313 | |||
| 314 | === 4.3 Architecture Gaps === | ||
| 315 | |||
| 316 | | Spec Requirement | POC1 Status | Gap Description | | ||
| 317 | | |-|-| | ||
| 318 | | **Three-Layer Architecture** | ✅ Implemented | Interface (Next.js) → Processing (AKEL) → Data (SQLite) | | ||
| 319 | | **LLM Abstraction Layer** | ✅ Implemented | AI SDK supports multiple providers with failover | | ||
| 320 | | **PostgreSQL Primary DB** | ⚠️ Different | Using SQLite for simplicity (acceptable for POC) | | ||
| 321 | | **Redis Caching** | ❌ Missing | No caching layer | | ||
| 322 | | **S3 Archival** | ❌ Missing | No long-term storage | | ||
| 323 | | **Background Jobs** | ❌ Missing | No scheduler for source updates, cache warming | | ||
| 324 | | **Quality Monitoring** | ⚠️ Partial | LLM call counting exists, but no anomaly detection | | ||
| 325 | |||
| 326 | === 4.4 Publication & Review Gaps === | ||
| 327 | |||
| 328 | | Spec Feature | POC1 Status | Gap Description | | ||
| 329 | | |-|-| | ||
| 330 | | **Risk Tier Publication Rules** | ❌ Missing | All results published immediately regardless of tier | | ||
| 331 | | **Human Review Queue** | ❌ Missing | No review workflow | | ||
| 332 | | **AI-Generated Labeling** | ⚠️ Partial | Results show "AI analysis" but no formal labeling system | | ||
| 333 | | **Audit Rate Sampling** | ❌ Missing | No sampling audits | | ||
| 334 | |||
| 335 | ---- | ||
| 336 | |||
| 337 | == 5. Optimization Recommendations == | ||
| 338 | |||
| 339 | === 5.1 Cost Optimizations === | ||
| 340 | |||
| 341 | {{mermaid}} | ||
| 342 | pie title Current LLM Cost Distribution (Estimated per Analysis) | ||
| 343 | "Step 1: Understand" : 15 | ||
| 344 | "Step 2: Research (per source)" : 60 | ||
| 345 | "Step 3: Verdicts" : 25 | ||
| 346 | {{/mermaid}} | ||
| 347 | |||
| 348 | | Optimization | Estimated Savings | Implementation Effort | | ||
| 349 | | |-| | | ||
| 350 | | **Cache claim understanding** | 30-50% on repeated claims | Medium | | ||
| 351 | | **Use Haiku for fact extraction** | 40% on Step 2 costs | Low (config change) | | ||
| 352 | | **Batch fact extraction** | 20% fewer API calls | Medium | | ||
| 353 | | **Skip search for known claims** | 50%+ for cached claims | High (needs claim DB) | | ||
| 354 | | **Reduce max iterations** | Linear reduction | Low (config change) | | ||
| 355 | |||
| 356 | === 5.2 Timing Optimizations === | ||
| 357 | |||
| 358 | {{mermaid}} | ||
| 359 | gantt | ||
| 360 | title Current Analysis Timeline (Typical) | ||
| 361 | dateFormat ss | ||
| 362 | axisFormat %S sec | ||
| 363 | |||
| 364 | section Current Flow | ||
| 365 | URL Fetch :a1, 00, 2s | ||
| 366 | Step 1 Understand :a2, after a1, 15s | ||
| 367 | Search Iteration 1 :a3, after a2, 8s | ||
| 368 | Fetch Sources 1 :a4, after a3, 10s | ||
| 369 | Extract Facts 1 :a5, after a4, 12s | ||
| 370 | Search Iteration 2 :a6, after a5, 8s | ||
| 371 | Fetch Sources 2 :a7, after a6, 10s | ||
| 372 | Extract Facts 2 :a8, after a7, 12s | ||
| 373 | Generate Verdicts :a9, after a8, 15s | ||
| 374 | |||
| 375 | section Optimized Flow | ||
| 376 | URL Fetch :b1, 00, 2s | ||
| 377 | Step 1 Understand :b2, after b1, 10s | ||
| 378 | Search + Fetch (parallel) :b3, after b2, 12s | ||
| 379 | Extract Facts (batched) :b4, after b3, 8s | ||
| 380 | Generate Verdicts :b5, after b4, 10s | ||
| 381 | {{/mermaid}} | ||
| 382 | |||
| 383 | | Optimization | Time Savings | Notes | | ||
| 384 | | | |-| | ||
| 385 | | **Parallel source fetching** | Already implemented | Currently fetches 3 sources in parallel | | ||
| 386 | | **Streaming LLM responses** | 20-30% perceived | User sees progress faster | | ||
| 387 | | **Search query batching** | 10-15% | Send multiple queries to search API | | ||
| 388 | | **Reduce prompt size** | 5-10% per call | Optimize system prompts | | ||
| 389 | | **Use faster models for extraction** | 30-40% on Step 2 | Claude Haiku vs Sonnet | | ||
| 390 | |||
| 391 | === 5.3 Priority Recommendations === | ||
| 392 | |||
| 393 | 1. **HIGH PRIORITY - Implement Claim Caching** | ||
| 394 | - Cache claim verdicts by content hash | ||
| 395 | - Reduces costs for repeated/similar claims | ||
| 396 | - Enables the separated verdict architecture (see Section 6) | ||
| 397 | |||
| 398 | 2. **MEDIUM PRIORITY - Use Tiered Models** | ||
| 399 | - Step 1 (Understand): Sonnet (needs reasoning) | ||
| 400 | - Step 2 (Extract): Haiku (simple extraction) | ||
| 401 | - Step 3 (Verdicts): Sonnet (needs synthesis) | ||
| 402 | |||
| 403 | 3. **LOW PRIORITY - Add Redis Cache** | ||
| 404 | - Cache source content (24h TTL) | ||
| 405 | - Cache search results (1h TTL) | ||
| 406 | - Reduces external API calls | ||
| 407 | |||
| 408 | ---- | ||
| 409 | |||
| 410 | == 6. Separated Verdict Architecture Proposal == | ||
| 411 | |||
| 412 | === 6.1 Current Architecture === | ||
| 413 | |||
| 414 | {{mermaid}} | ||
| 415 | flowchart LR | ||
| 416 | subgraph Current["Current: Monolithic Analysis"] | ||
| 417 | INPUT[Article Input] --> ANALYZE[Full Analysis Pipeline] | ||
| 418 | ANALYZE --> CLAIMS[Claim Verdicts] | ||
| 419 | ANALYZE --> ARTICLE[Article Verdict] | ||
| 420 | CLAIMS -.->|"Aggregated"| ARTICLE | ||
| 421 | end | ||
| 422 | {{/mermaid}} | ||
| 423 | |||
| 424 | **Issues:** | ||
| 425 | - Every analysis re-processes all claims | ||
| 426 | - No caching of individual claim verdicts | ||
| 427 | - Article verdict tightly coupled to claim extraction | ||
| 428 | |||
| 429 | === 6.2 Proposed Separated Architecture === | ||
| 430 | |||
| 431 | {{mermaid}} | ||
| 432 | flowchart TB | ||
| 433 | subgraph Input["Input Processing"] | ||
| 434 | ARTICLE[Article/Text Input] | ||
| 435 | EXTRACT[Claim Extraction] | ||
| 436 | end | ||
| 437 | |||
| 438 | subgraph ClaimLayer["Claim Verdict Layer (Cacheable)"] | ||
| 439 | CACHE[(Claim Cache<br/>━━━━━━━━━━━━━<br/>Key: claim_hash<br/>TTL: 7 days)] | ||
| 440 | |||
| 441 | CLAIM1["Claim 1 Analysis"] | ||
| 442 | CLAIM2["Claim 2 Analysis"] | ||
| 443 | CLAIM3["Claim N Analysis"] | ||
| 444 | |||
| 445 | VERDICT1[Claim 1 Verdict] | ||
| 446 | VERDICT2[Claim 2 Verdict] | ||
| 447 | VERDICT3[Claim N Verdict] | ||
| 448 | end | ||
| 449 | |||
| 450 | subgraph ArticleLayer["Article Verdict Layer (Dynamic)"] | ||
| 451 | AGGREGATE[Aggregate Claim Verdicts] | ||
| 452 | CONTEXT[Apply Article Context<br/>━━━━━━━━━━━━━<br/>• Claim relationships<br/>• Logical structure<br/>• Author intent] | ||
| 453 | ARTICLE_VERDICT[Article Verdict] | ||
| 454 | end | ||
| 455 | |||
| 456 | %% Flow | ||
| 457 | ARTICLE --> EXTRACT | ||
| 458 | EXTRACT --> CLAIM1 | ||
| 459 | EXTRACT --> CLAIM2 | ||
| 460 | EXTRACT --> CLAIM3 | ||
| 461 | |||
| 462 | CLAIM1 -->|"Cache Miss"| VERDICT1 | ||
| 463 | CLAIM2 -->|"Cache Hit"| VERDICT2 | ||
| 464 | CLAIM3 -->|"Cache Miss"| VERDICT3 | ||
| 465 | |||
| 466 | CLAIM1 <-.-> CACHE | ||
| 467 | CLAIM2 <-.-> CACHE | ||
| 468 | CLAIM3 <-.-> CACHE | ||
| 469 | |||
| 470 | VERDICT1 --> AGGREGATE | ||
| 471 | VERDICT2 --> AGGREGATE | ||
| 472 | VERDICT3 --> AGGREGATE | ||
| 473 | |||
| 474 | AGGREGATE --> CONTEXT | ||
| 475 | CONTEXT --> ARTICLE_VERDICT | ||
| 476 | |||
| 477 | classDef cache fill:#fff9c4,stroke:#f57f17,stroke-width:2px | ||
| 478 | classDef dynamic fill:#e8f5e9,stroke:#2e7d32,stroke-width:2px | ||
| 479 | class CACHE cache | ||
| 480 | class CONTEXT,ARTICLE_VERDICT dynamic | ||
| 481 | {{/mermaid}} | ||
| 482 | |||
| 483 | === 6.3 Benefits Analysis === | ||
| 484 | |||
| 485 | | Benefit | Impact | Rationale | | ||
| 486 | |-| |-| | ||
| 487 | | **Cost Reduction** | 40-70% for repeated claims | Many articles share common claims (e.g., "COVID vaccines are safe") | | ||
| 488 | | **Faster Analysis** | 50%+ for cached claims | Skip research + LLM calls for known claims | | ||
| 489 | | **Consistency** | High | Same claim always gets same verdict (until cache expires) | | ||
| 490 | | **Freshness Control** | Configurable TTL | Balance consistency vs. new evidence | | ||
| 491 | | **Scalability** | Linear improvement | More users = higher cache hit rate | | ||
| 492 | |||
| 493 | === 6.4 Implementation Considerations === | ||
| 494 | |||
| 495 | **Claim Hashing Strategy:** | ||
| 496 | {{code language="typescript"}}function getClaimHash(claim: string): string { | ||
| 497 | // Normalize: lowercase, remove punctuation, stem words | ||
| 498 | const normalized = normalize(claim); | ||
| 499 | // Hash for cache key | ||
| 500 | return crypto.createHash('sha256').update(normalized).digest('hex').slice(0, 16); | ||
| 501 | }{{/code}} | ||
| 502 | |||
| 503 | **Cache Invalidation Triggers:** | ||
| 504 | - TTL expiration (default 7 days) | ||
| 505 | - Major news event related to claim topic | ||
| 506 | - Source track record significant change | ||
| 507 | - Manual invalidation by moderator | ||
| 508 | |||
| 509 | **Article Verdict Considerations:** | ||
| 510 | - Article verdict should ALWAYS be dynamic (never cached) | ||
| 511 | - Same claims in different article contexts may yield different article verdicts | ||
| 512 | - Example: "Vaccines are safe" + "Vaccines cause autism" → article may be misleading even if first claim is true | ||
| 513 | |||
| 514 | ### 6.5 Recommendation## | ||
| 515 | |||
| 516 | **YES, separating is beneficial** with the following caveats: | ||
| 517 | |||
| 518 | 1. **Claim verdicts should be cached** with semantic similarity matching (not just exact match) | ||
| 519 | 2. **Article verdicts should always be dynamic** to account for: | ||
| 520 | - Claim relationships and logical structure | ||
| 521 | - Author's argumentative strategy | ||
| 522 | - Context and framing | ||
| 523 | - Selective use of true claims to support false conclusions | ||
| 524 | |||
| 525 | 3. **Implementation phases:** | ||
| 526 | - Phase 1: Exact-match claim caching (simple hash) | ||
| 527 | - Phase 2: Semantic similarity caching (embedding-based) | ||
| 528 | - Phase 3: Federated claim sharing across instances | ||
| 529 | |||
| 530 | ---- | ||
| 531 | |||
| 532 | == 7. Summary == | ||
| 533 | |||
| 534 | === Current State === | ||
| 535 | |||
| 536 | - POC1 implements core AKEL pipeline successfully | ||
| 537 | - Claim dependency tracking is implemented | ||
| 538 | - Multiple LLM providers supported | ||
| 539 | - No persistent claim storage or caching | ||
| 540 | |||
| 541 | === Key Gaps from Specification === | ||
| 542 | |||
| 543 | - No scenario extraction | ||
| 544 | - No user/role system | ||
| 545 | - No audit trail | ||
| 546 | - No source track record updates | ||
| 547 | - No review queue | ||
| 548 | |||
| 549 | === Recommended Next Steps === | ||
| 550 | |||
| 551 | 1. Implement claim caching layer | ||
| 552 | 2. Separate claim vs article verdict generation | ||
| 553 | 3. Add Redis for source/search caching | ||
| 554 | 4. Implement tiered model selection | ||
| 555 | 5. Add basic audit logging |