Changes for page FactHarbor POC1 Architecture Analysis 1.Jan.26
Last modified by Robert Schaub on 2026/02/08 08:12
From version 6.1
edited by Robert Schaub
on 2026/01/02 10:06
on 2026/01/02 10:06
Change comment:
There is no comment for this version
Summary
-
Page properties (1 modified, 0 added, 0 removed)
Details
- Page properties
-
- Content
-
... ... @@ -94,8 +94,121 @@ 94 94 95 95 == 2. ERD Data Model (Current POC1 Implementation) == 96 96 97 +**Data Objects ERD** 98 + 97 97 {{mermaid}} 98 98 erDiagram 101 + ARTICLE ||--o{ CLAIM : "contains" 102 + ARTICLE ||--|| ARTICLE_VERDICT : "has" 103 + CLAIM ||--|| CLAIM_VERDICT : "has" 104 + CLAIM ||--o{ CLAIM : "depends on" 105 + CLAIM_VERDICT }o--o{ EVIDENCE : "supported by" 106 + SOURCE ||--o{ EVIDENCE : "provides" 107 + ARTICLE ||--o{ SOURCE : "references" 108 + 109 + ARTICLE { 110 + string id PK "Unique identifier (job ID)" 111 + string inputType "text | url" 112 + string inputValue "Original URL or text" 113 + string articleThesis "Main argument/thesis" 114 + string detectedInputType "question | claim | article" 115 + boolean isQuestion "True if input is a question" 116 + datetime createdAt "Analysis timestamp" 117 + datetime updatedAt "Last update" 118 + json distinctProceedings "Legal proceedings if any" 119 + boolean hasMultipleProceedings "Multi-proceeding flag" 120 + string proceedingContext "Context for proceedings" 121 + json logicalFallacies "Detected fallacies array" 122 + boolean isPseudoscience "Pseudoscience detection" 123 + string_array pseudoscienceCategories "Categories if detected" 124 + int llmCalls "Total LLM API calls" 125 + json searchQueries "All search queries performed" 126 + string schemaVersion "e.g. 2.6.17" 127 + } 128 + 129 + CLAIM { 130 + string id PK "SC1, SC2, C1, etc." 131 + string articleId FK "Parent article" 132 + string text "The claim statement" 133 + string type "legal | procedural | factual | evaluative" 134 + string claimRole "attribution | source | timing | core" 135 + string_array dependsOn "IDs of prerequisite claims" 136 + string_array keyEntities "Named entities in claim" 137 + boolean isCentral "Is this a central claim?" 138 + string relatedProceedingId "Linked proceeding if any" 139 + int startOffset "Position in original text" 140 + int endOffset "End position in original text" 141 + string approximatePosition "Descriptive position" 142 + } 143 + 144 + CLAIM_VERDICT { 145 + string id PK "Same as claim ID" 146 + string claimId FK "Reference to claim" 147 + string llmVerdict "WELL-SUPPORTED | PARTIALLY-SUPPORTED | UNCERTAIN | REFUTED" 148 + string verdict "True | Mostly True | Leaning True | Unverified | Leaning False | Mostly False | False" 149 + int confidence "0-100 LLM confidence" 150 + int truthPercentage "0-100 calibrated truth score" 151 + string riskTier "A (high) | B (medium) | C (low)" 152 + string reasoning "Explanation of verdict" 153 + string_array supportingFactIds "Evidence IDs supporting this" 154 + boolean dependencyFailed "True if prerequisite failed" 155 + string_array failedDependencies "Which deps failed" 156 + string highlightColor "green | light-green | yellow | orange | dark-orange | red | dark-red" 157 + boolean isPseudoscience "Pseudoscience flag" 158 + string escalationReason "Why verdict was escalated" 159 + } 160 + 161 + ARTICLE_VERDICT { 162 + string id PK "Same as article ID" 163 + string articleId FK "Reference to article" 164 + string llmArticleVerdict "Original LLM verdict" 165 + int llmArticleConfidence "Original LLM confidence" 166 + string articleVerdict "True | Mostly True | Leaning True | Unverified | Leaning False | Mostly False | False" 167 + int articleTruthPercentage "0-100 calibrated score" 168 + string articleVerdictReason "Why verdict differs from claims avg" 169 + int claimsAverageTruthPercentage "Average of claim verdicts" 170 + string claimsAverageVerdict "7-point average verdict" 171 + int claimsTotal "Total claims analyzed" 172 + int claimsSupported "Claims with truth >= 72%" 173 + int claimsUncertain "Claims with truth 43-71%" 174 + int claimsRefuted "Claims with truth < 43%" 175 + int centralClaimsTotal "Number of central claims" 176 + int centralClaimsSupported "Central claims supported" 177 + } 178 + 179 + EVIDENCE { 180 + string id PK "S1-F1, S1-F2 format" 181 + string sourceId FK "Reference to source" 182 + string claimId FK "Optional: specific claim this supports" 183 + string fact "The factual statement extracted" 184 + string category "legal_provision | evidence | expert_quote | statistic | event | criticism" 185 + string specificity "high | medium" 186 + string sourceExcerpt "Original text excerpt" 187 + string relatedProceedingId "Linked proceeding if any" 188 + boolean isContestedClaim "Is this a contested assertion" 189 + string claimSource "Who made contested claim" 190 + } 191 + 192 + SOURCE { 193 + string id PK "S1, S2, etc." 194 + string articleId FK "Parent article" 195 + string url "Full URL" 196 + string title "Page/document title" 197 + string domain "Extracted domain" 198 + int trackRecordScore "0-100 reliability score or null" 199 + string fullText "Extracted content" 200 + datetime fetchedAt "When content was fetched" 201 + string category "news | academic | government | legal" 202 + boolean fetchSuccess "True if fetch succeeded" 203 + string searchQuery "Which query found this" 204 + string mimeType "text/html | application/pdf" 205 + } 206 +{{/mermaid}} 207 + 208 +**Data Usage ERD** 209 + 210 +{{mermaid}} 211 +erDiagram 99 99 JOB ||--o{ JOB_EVENT : "has" 100 100 JOB ||--|| ANALYSIS_RESULT : "produces" 101 101 ANALYSIS_RESULT ||--o{ CLAIM_VERDICT : "contains"