Last modified by Robert Schaub on 2026/02/08 08:12

From version 8.1
edited by Robert Schaub
on 2026/01/02 10:13
Change comment: There is no comment for this version
To version 5.1
edited by Robert Schaub
on 2026/01/02 10:04
Change comment: Rollback to version 3.1

Summary

Details

Page properties
Content
... ... @@ -1,5 +1,6 @@
1 -= FactHarbor POC1 Architecture Analysis 1.Jan.26 =
1 += FactHarbor POC1 Architecture Analysis =
2 2  
3 +
3 3  **Version:** 2.6.17
4 4  **Analysis Date:** January 2026
5 5  **Document Purpose:** Technical diagrams, gap analysis, and optimization recommendations
... ... @@ -92,123 +92,12 @@
92 92  
93 93  ----
94 94  
96 +
95 95  == 2. ERD Data Model (Current POC1 Implementation) ==
96 96  
97 -**Data Objects ERD**
98 98  
99 99  {{mermaid}}
100 100  erDiagram
101 - ARTICLE ||--o{ CLAIM : "contains"
102 - ARTICLE ||--|| ARTICLE_VERDICT : "has"
103 - CLAIM ||--|| CLAIM_VERDICT : "has"
104 - CLAIM ||--o{ CLAIM : "depends on"
105 - CLAIM_VERDICT }o--o{ EVIDENCE : "supported by"
106 - SOURCE ||--o{ EVIDENCE : "provides"
107 - ARTICLE ||--o{ SOURCE : "references"
108 -
109 - ARTICLE {
110 - string id PK "Unique identifier (job ID)"
111 - string inputType "text | url"
112 - string inputValue "Original URL or text"
113 - string articleThesis "Main argument/thesis"
114 - string detectedInputType "question | claim | article"
115 - boolean isQuestion "True if input is a question"
116 - datetime createdAt "Analysis timestamp"
117 - datetime updatedAt "Last update"
118 - json distinctProceedings "Legal proceedings if any"
119 - boolean hasMultipleProceedings "Multi-proceeding flag"
120 - string proceedingContext "Context for proceedings"
121 - json logicalFallacies "Detected fallacies array"
122 - boolean isPseudoscience "Pseudoscience detection"
123 - string_array pseudoscienceCategories "Categories if detected"
124 - int llmCalls "Total LLM API calls"
125 - json searchQueries "All search queries performed"
126 - string schemaVersion "e.g. 2.6.17"
127 - }
128 -
129 - CLAIM {
130 - string id PK "SC1, SC2, C1, etc."
131 - string articleId FK "Parent article"
132 - string text "The claim statement"
133 - string type "legal | procedural | factual | evaluative"
134 - string claimRole "attribution | source | timing | core"
135 - string_array dependsOn "IDs of prerequisite claims"
136 - string_array keyEntities "Named entities in claim"
137 - boolean isCentral "Is this a central claim?"
138 - string relatedProceedingId "Linked proceeding if any"
139 - int startOffset "Position in original text"
140 - int endOffset "End position in original text"
141 - string approximatePosition "Descriptive position"
142 - }
143 -
144 - CLAIM_VERDICT {
145 - string id PK "Same as claim ID"
146 - string claimId FK "Reference to claim"
147 - string llmVerdict "WELL-SUPPORTED | PARTIALLY-SUPPORTED | UNCERTAIN | REFUTED"
148 - string verdict "True | Mostly True | Leaning True | Unverified | Leaning False | Mostly False | False"
149 - int confidence "0-100 LLM confidence"
150 - int truthPercentage "0-100 calibrated truth score"
151 - string riskTier "A (high) | B (medium) | C (low)"
152 - string reasoning "Explanation of verdict"
153 - string_array supportingFactIds "Evidence IDs supporting this"
154 - boolean dependencyFailed "True if prerequisite failed"
155 - string_array failedDependencies "Which deps failed"
156 - string highlightColor "green | light-green | yellow | orange | dark-orange | red | dark-red"
157 - boolean isPseudoscience "Pseudoscience flag"
158 - string escalationReason "Why verdict was escalated"
159 - }
160 -
161 - ARTICLE_VERDICT {
162 - string id PK "Same as article ID"
163 - string articleId FK "Reference to article"
164 - string llmArticleVerdict "Original LLM verdict"
165 - int llmArticleConfidence "Original LLM confidence"
166 - string articleVerdict "True | Mostly True | Leaning True | Unverified | Leaning False | Mostly False | False"
167 - int articleTruthPercentage "0-100 calibrated score"
168 - string articleVerdictReason "Why verdict differs from claims avg"
169 - int claimsAverageTruthPercentage "Average of claim verdicts"
170 - string claimsAverageVerdict "7-point average verdict"
171 - int claimsTotal "Total claims analyzed"
172 - int claimsSupported "Claims with truth >= 72%"
173 - int claimsUncertain "Claims with truth 43-71%"
174 - int claimsRefuted "Claims with truth < 43%"
175 - int centralClaimsTotal "Number of central claims"
176 - int centralClaimsSupported "Central claims supported"
177 - }
178 -
179 - EVIDENCE {
180 - string id PK "S1-F1, S1-F2 format"
181 - string sourceId FK "Reference to source"
182 - string claimId FK "Optional: specific claim this supports"
183 - string fact "The factual statement extracted"
184 - string category "legal_provision | evidence | expert_quote | statistic | event | criticism"
185 - string specificity "high | medium"
186 - string sourceExcerpt "Original text excerpt"
187 - string relatedProceedingId "Linked proceeding if any"
188 - boolean isContestedClaim "Is this a contested assertion"
189 - string claimSource "Who made contested claim"
190 - }
191 -
192 - SOURCE {
193 - string id PK "S1, S2, etc."
194 - string articleId FK "Parent article"
195 - string url "Full URL"
196 - string title "Page/document title"
197 - string domain "Extracted domain"
198 - int trackRecordScore "0-100 reliability score or null"
199 - string fullText "Extracted content"
200 - datetime fetchedAt "When content was fetched"
201 - string category "news | academic | government | legal"
202 - boolean fetchSuccess "True if fetch succeeded"
203 - string searchQuery "Which query found this"
204 - string mimeType "text/html | application/pdf"
205 - }
206 -{{/mermaid}}
207 -
208 -**Data Usage ERD**
209 -
210 -{{mermaid}}
211 -erDiagram
212 212   JOB ||--o{ JOB_EVENT : "has"
213 213   JOB ||--|| ANALYSIS_RESULT : "produces"
214 214   ANALYSIS_RESULT ||--o{ CLAIM_VERDICT : "contains"
... ... @@ -298,8 +298,10 @@
298 298  
299 299  ----
300 300  
191 +
301 301  == 3. Overall Architecture with Interactions ==
302 302  
194 +
303 303  {{mermaid}}
304 304  flowchart TB
305 305   subgraph Client["🖥️ Client Layer"]
... ... @@ -395,10 +395,14 @@
395 395  
396 396  ----
397 397  
290 +
398 398  == 4. Specification vs Implementation Gap Analysis ==
399 399  
293 +
294 +
400 400  === 4.1 Data Model Gaps ===
401 401  
297 +
402 402  | Specification Entity | POC1 Status | Gap Description |
403 403  |-|-|-|
404 404  | **Claim** | ⚠️ Partial | No persistent storage; claims exist only in JSON result. Missing: `status`, `confidence_score`, `risk_score`, `completeness_score`, `version`, `views`, `edit_count` |
... ... @@ -426,6 +426,7 @@
426 426  
427 427  === 4.3 Architecture Gaps ===
428 428  
325 +
429 429  | Spec Requirement | POC1 Status | Gap Description |
430 430  | |-|-|
431 431  | **Three-Layer Architecture** | ✅ Implemented | Interface (Next.js) → Processing (AKEL) → Data (SQLite) |
... ... @@ -438,6 +438,7 @@
438 438  
439 439  === 4.4 Publication & Review Gaps ===
440 440  
338 +
441 441  | Spec Feature | POC1 Status | Gap Description |
442 442  | |-|-|
443 443  | **Risk Tier Publication Rules** | ❌ Missing | All results published immediately regardless of tier |
... ... @@ -447,10 +447,14 @@
447 447  
448 448  ----
449 449  
348 +
450 450  == 5. Optimization Recommendations ==
451 451  
351 +
352 +
452 452  === 5.1 Cost Optimizations ===
453 453  
355 +
454 454  {{mermaid}}
455 455  pie title Current LLM Cost Distribution (Estimated per Analysis)
456 456   "Step 1: Understand" : 15
... ... @@ -468,6 +468,7 @@
468 468  
469 469  === 5.2 Timing Optimizations ===
470 470  
373 +
471 471  {{mermaid}}
472 472  gantt
473 473   title Current Analysis Timeline (Typical)
... ... @@ -503,6 +503,7 @@
503 503  
504 504  === 5.3 Priority Recommendations ===
505 505  
409 +
506 506  1. **HIGH PRIORITY - Implement Claim Caching**
507 507   - Cache claim verdicts by content hash
508 508   - Reduces costs for repeated/similar claims
... ... @@ -520,10 +520,14 @@
520 520  
521 521  ----
522 522  
427 +
523 523  == 6. Separated Verdict Architecture Proposal ==
524 524  
430 +
431 +
525 525  === 6.1 Current Architecture ===
526 526  
434 +
527 527  {{mermaid}}
528 528  flowchart LR
529 529   subgraph Current["Current: Monolithic Analysis"]
... ... @@ -539,8 +539,10 @@
539 539  - No caching of individual claim verdicts
540 540  - Article verdict tightly coupled to claim extraction
541 541  
450 +
542 542  === 6.2 Proposed Separated Architecture ===
543 543  
453 +
544 544  {{mermaid}}
545 545  flowchart TB
546 546   subgraph Input["Input Processing"]
... ... @@ -593,8 +593,10 @@
593 593   class CONTEXT,ARTICLE_VERDICT dynamic
594 594  {{/mermaid}}
595 595  
506 +
596 596  === 6.3 Benefits Analysis ===
597 597  
509 +
598 598  | Benefit | Impact | Rationale |
599 599  |-| |-|
600 600  | **Cost Reduction** | 40-70% for repeated claims | Many articles share common claims (e.g., "COVID vaccines are safe") |
... ... @@ -642,8 +642,11 @@
642 642  
643 643  ----
644 644  
557 +
645 645  == 7. Summary ==
646 646  
560 +
561 +
647 647  === Current State ===
648 648  
649 649  - POC1 implements core AKEL pipeline successfully
... ... @@ -651,6 +651,7 @@
651 651  - Multiple LLM providers supported
652 652  - No persistent claim storage or caching
653 653  
569 +
654 654  === Key Gaps from Specification ===
655 655  
656 656  - No scenario extraction
... ... @@ -659,6 +659,7 @@
659 659  - No source track record updates
660 660  - No review queue
661 661  
578 +
662 662  === Recommended Next Steps ===
663 663  
664 664  1. Implement claim caching layer