Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
Njust.Pdf.Analysis.V4.5
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
陶然
Njust.Pdf.Analysis.V4.5
Commits
c98e4caf
Commit
c98e4caf
authored
Jun 09, 2021
by
陶然
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
新增图片解析job
parent
423b2476
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
120 additions
and
25 deletions
+120
-25
Analyse.cs
Src/Entities/Analyse.cs
+1
-7
AnalyseJob.cs
Src/Jobs/AnalyseJob.cs
+96
-13
RestfulAnalyse.cs
Src/Tranforms/RestfulAnalyse.cs
+23
-5
No files found.
Src/Entities/Analyse.cs
View file @
c98e4caf
...
@@ -42,7 +42,7 @@ namespace Njust.Pdf.Analysis.Entities
...
@@ -42,7 +42,7 @@ namespace Njust.Pdf.Analysis.Entities
public
string
Keywords
{
get
;
set
;
}
public
string
Keywords
{
get
;
set
;
}
[
ApiMember
(
Description
=
"发表日期"
)]
[
ApiMember
(
Description
=
"发表日期"
)]
public
DateTime
?
PublishTime
{
get
;
set
;
}
public
string
PublishTime
{
get
;
set
;
}
[
ApiMember
(
Description
=
"机构"
)]
[
ApiMember
(
Description
=
"机构"
)]
[
StringLength
(
500
)]
[
StringLength
(
500
)]
...
@@ -120,9 +120,6 @@ namespace Njust.Pdf.Analysis.Entities
...
@@ -120,9 +120,6 @@ namespace Njust.Pdf.Analysis.Entities
public
string
DomainName
{
get
;
set
;
}
public
string
DomainName
{
get
;
set
;
}
}
}
/// <summary>
/// 导入系统:0 全文解析:25 确认后:50 复核后:100
/// </summary>
[
Alias
(
Configs
.
TableAnalyseName
)]
[
Alias
(
Configs
.
TableAnalyseName
)]
[
Api
(
Description
=
"文献解析表"
)]
[
Api
(
Description
=
"文献解析表"
)]
public
class
Analyse
:
AnalyseBase
,
IEntityInAssemblyDb
public
class
Analyse
:
AnalyseBase
,
IEntityInAssemblyDb
...
@@ -132,9 +129,6 @@ namespace Njust.Pdf.Analysis.Entities
...
@@ -132,9 +129,6 @@ namespace Njust.Pdf.Analysis.Entities
}
}
/// <summary>
/// 导入系统:0 全文解析:25 确认后:50 复核后:100
/// </summary>
[
Alias
(
Configs
.
TableAnalyseName
)]
[
Alias
(
Configs
.
TableAnalyseName
)]
[
Api
(
Description
=
"文献解析表"
)]
[
Api
(
Description
=
"文献解析表"
)]
public
class
AnalyseAll
:
AnalyseBase
,
IEntityInAssemblyDb
public
class
AnalyseAll
:
AnalyseBase
,
IEntityInAssemblyDb
...
...
Src/Jobs/AnalyseJob.cs
View file @
c98e4caf
...
@@ -50,9 +50,9 @@ namespace Njust.Pdf.Analysis.Jobs
...
@@ -50,9 +50,9 @@ namespace Njust.Pdf.Analysis.Jobs
try
try
{
{
var
query
=
conn
.
From
<
AnalyseAll
>();
var
query
=
conn
.
From
<
AnalyseAll
>();
query
.
Take
(
10
);
query
.
Take
(
5
);
query
.
Skip
(
0
);
query
.
Skip
(
0
);
query
.
Where
(
o
=>
o
.
Status
<
4
);
//如果异常了,会重试
query
.
Where
(
o
=>
o
.
IsAnlysis
==
false
);
//如果异常了,会重试
query
.
OrderBy
(
o
=>
o
.
CreateTime
).
ThenBy
(
o
=>
o
.
Kvid
);
query
.
OrderBy
(
o
=>
o
.
CreateTime
).
ThenBy
(
o
=>
o
.
Kvid
);
var
entities
=
conn
.
Select
(
query
);
var
entities
=
conn
.
Select
(
query
);
if
(
entities
.
IsNullOrEmpty
())
if
(
entities
.
IsNullOrEmpty
())
...
@@ -77,21 +77,99 @@ namespace Njust.Pdf.Analysis.Jobs
...
@@ -77,21 +77,99 @@ namespace Njust.Pdf.Analysis.Jobs
text
=
regex
.
Replace
(
text
,
""
);
text
=
regex
.
Replace
(
text
,
""
);
item
.
Allbody
=
text
;
item
.
Allbody
=
text
;
item
.
AddOnlyProperties
(
o
=>
o
.
Allbody
);
item
.
AddOnlyProperties
(
o
=>
o
.
Allbody
);
//item.AnlysisTime = DateTime.Now;
item
.
AnlysisTime
=
DateTime
.
Now
;
//item.AddOnlyProperties(o => o.AnlysisTime);
item
.
AddOnlyProperties
(
o
=>
o
.
AnlysisTime
);
//item.AnlysisUserName = KiviiContext.CurrentMember.FullName;
item
.
AnlysisUserName
=
KiviiContext
.
CurrentMember
.
FullName
;
//item.AddOnlyProperties(o => o.AnlysisUserName);
item
.
AddOnlyProperties
(
o
=>
o
.
AnlysisUserName
);
//item.IsAnlysis = true;
item
.
IsAnlysis
=
true
;
//item.AddOnlyProperties(o => o.IsAnlysis);
item
.
AddOnlyProperties
(
o
=>
o
.
IsAnlysis
);
item
.
Status
=
25
;
//解析了全文 后状态为50
item
.
AddOnlyProperties
(
o
=>
o
.
Status
);
conn
.
UpdateOnly
(
item
);
conn
.
UpdateOnly
(
item
);
succeed
++;
succeed
++;
}
catch
(
Exception
ex
)
{
TaskContext
.
Message
=
$"处理消息异常:共处理
{
succeed
}
/
{
total
}
,Kvid:
{
item
.
Kvid
}
,Ex:
{
ex
.
Message
}
"
;
item
.
Status
+=
1
;
item
.
AddOnlyProperties
(
o
=>
o
.
Status
);
conn
.
UpdateOnly
(
item
);
return
false
;
}
}
}
catch
(
Exception
ex
)
{
TaskContext
.
Message
=
$"处理消息异常:共处理
{
succeed
}
/
{
total
}
,Ex:
{
ex
.
Message
}
"
;
return
false
;
}
TaskContext
.
Message
=
$"完成:共处理
{
succeed
}
/
{
total
}
"
;
return
true
;
}
}
public
class
AnalyseImageJob
:
IJob
{
public
string
Name
{
get
{
return
"抓取PDF中的图片"
;
}
}
public
string
Description
{
get
{
return
"抓取PDF中的图片"
;
}
}
public
IJobContext
TaskContext
{
get
;
set
;
}
public
void
Dispose
()
{
}
public
bool
Execution
()
{
return
pdfImage
();
}
private
bool
pdfImage
()
{
int
succeed
=
0
,
total
=
0
;
var
conn
=
KiviiContext
.
GetOpenedDbConnection
<
AnalyseAll
>();
try
{
var
query
=
conn
.
From
<
AnalyseAll
>();
query
.
Take
(
2
);
query
.
Skip
(
0
);
query
.
Where
(
o
=>
o
.
Status
<
4
);
//如果异常了,会重试
query
.
OrderBy
(
o
=>
o
.
CreateTime
).
ThenBy
(
o
=>
o
.
Kvid
);
var
entities
=
conn
.
Select
(
query
);
if
(
entities
.
IsNullOrEmpty
())
{
TaskContext
.
Message
=
"无数据需要处理"
;
return
true
;
}
total
=
entities
.
Count
;
var
queryPdfEntityFile
=
conn
.
From
<
EntityDbFile
<
Analyse
>>();
queryPdfEntityFile
.
Where
(
o
=>
o
.
ParentKvid
==
Guid
.
Empty
&
o
.
DbFolderPath
==
"/Pdf/Analysis"
&
o
.
Extension
.
ToLower
()
==
".pdf"
&
Sql
.
In
(
o
.
OwnerKvid
,
entities
.
ConvertAll
(
p
=>
p
.
Kvid
)));
var
pdfEntityFiles
=
conn
.
Select
(
queryPdfEntityFile
);
foreach
(
var
item
in
entities
)
{
try
{
var
pdfFile
=
pdfEntityFiles
.
FirstOrDefault
(
o
=>
o
.
OwnerKvid
==
item
.
Kvid
);
if
(
pdfFile
==
null
)
throw
new
Exception
(
$"未找Pdf文件:
{
item
.
Title
}
,HashCode:
{
item
.
HashCode
}
"
);
var
physicalStorageFilePath
=
pdfFile
.
GetPhysicalPath
();
if
(!
File
.
Exists
(
physicalStorageFilePath
))
throw
new
Exception
(
$"未找Pdf文件:
{
item
.
Title
}
,HashCode:
{
item
.
HashCode
}
"
);
#
region
图片
#
region
图片
var
imgPath
=
$"/Storages/Pdf/Analysis/Images/
{
item
.
Kvid
}
"
;
var
imgPath
=
$"/Storages/Pdf/Analysis/Images/
{
item
.
Kvid
}
"
;
var
reader
=
new
PdfReader
(
physicalStorageFilePath
);
var
reader
=
new
PdfReader
(
physicalStorageFilePath
);
var
fileName
=
pdfFile
.
Name
.
Replace
(
pdfFile
.
Extension
,
""
);
for
(
var
i
=
0
;
i
<
reader
.
XrefSize
;
i
++)
for
(
var
i
=
0
;
i
<
reader
.
XrefSize
;
i
++)
{
{
var
pdfObj
=
reader
.
GetPdfObject
(
i
);
var
pdfObj
=
reader
.
GetPdfObject
(
i
);
...
@@ -105,7 +183,7 @@ namespace Njust.Pdf.Analysis.Jobs
...
@@ -105,7 +183,7 @@ namespace Njust.Pdf.Analysis.Jobs
{
{
var
ms
=
new
MemoryStream
(
bytes
);
var
ms
=
new
MemoryStream
(
bytes
);
ms
.
Position
=
0
;
ms
.
Position
=
0
;
KiviiContext
.
VirtualFiles
.
WriteFile
(
$"
{
imgPath
}
/
{
i
}
.jpg"
,
ms
);
KiviiContext
.
VirtualFiles
.
WriteFile
(
$"
{
imgPath
}
/
{
fileName
}
_
{
i
}
.jpg"
,
ms
);
//var img = System.Drawing.Image.FromStream(ms);
//var img = System.Drawing.Image.FromStream(ms);
//img.Save(Path.Combine(tbxExportImagesPath.Text, $"{i}.jpg"), System.Drawing.Imaging.ImageFormat.Jpeg);
//img.Save(Path.Combine(tbxExportImagesPath.Text, $"{i}.jpg"), System.Drawing.Imaging.ImageFormat.Jpeg);
...
@@ -114,10 +192,15 @@ namespace Njust.Pdf.Analysis.Jobs
...
@@ -114,10 +192,15 @@ namespace Njust.Pdf.Analysis.Jobs
{
}
{
}
}
}
#
endregion
#
endregion
item
.
Status
=
int
.
MaxValue
;
item
.
AddOnlyProperties
(
o
=>
o
.
Status
);
conn
.
UpdateOnly
(
item
);
succeed
++;
}
}
catch
(
Exception
ex
)
catch
(
Exception
ex
)
{
{
TaskContext
.
Message
=
$"处理消息异常:共处理
{
succeed
}
/
{
total
}
,Ex:
{
ex
.
Message
}
"
;
TaskContext
.
Message
=
$"处理消息异常:共处理
{
succeed
}
/
{
total
}
,
Kvid:
{
item
.
Kvid
}
,
Ex:
{
ex
.
Message
}
"
;
item
.
Status
+=
1
;
item
.
Status
+=
1
;
item
.
AddOnlyProperties
(
o
=>
o
.
Status
);
item
.
AddOnlyProperties
(
o
=>
o
.
Status
);
conn
.
UpdateOnly
(
item
);
conn
.
UpdateOnly
(
item
);
...
...
Src/Tranforms/RestfulAnalyse.cs
View file @
c98e4caf
...
@@ -44,6 +44,28 @@ namespace Njust.Pdf.Analysis.Tranforms
...
@@ -44,6 +44,28 @@ namespace Njust.Pdf.Analysis.Tranforms
#
endregion
#
endregion
[
RequiresAnyRole
(
SystemRoles
.
Everyone
)]
[
RequiresAnyRole
(
SystemRoles
.
Everyone
)]
[
Route
(
"/Analysis/PreCheck/{Kvid}"
)]
public
class
AnalysePreCheck
:
RestfulExecution
<
Analyse
>
{
public
Guid
Kvid
{
get
;
set
;
}
public
override
object
OnExecution
(
IRequest
req
,
IResponse
res
)
{
if
(
Kvid
==
Guid
.
Empty
)
return
HttpError
.
NotFound
(
"Need Kvid!"
);
var
conn
=
KiviiContext
.
GetOpenedDbConnection
<
Analyse
>();
var
analyse
=
conn
.
SingleById
<
Analyse
>(
Kvid
);
if
(
analyse
==
null
)
return
HttpError
.
NotFound
(
"This Item is Unannounced!"
);
return
new
HttpResult
{
Response
=
analyse
,
View
=
"Template.AnalysisPDF"
};
}
}
[
RequiresAnyRole
(
SystemRoles
.
Everyone
)]
public
class
AnalyseImport
:
RestfulExecution
<
Analyse
>
public
class
AnalyseImport
:
RestfulExecution
<
Analyse
>
{
{
public
string
DomainName
{
get
;
set
;
}
public
string
DomainName
{
get
;
set
;
}
...
@@ -141,8 +163,6 @@ namespace Njust.Pdf.Analysis.Tranforms
...
@@ -141,8 +163,6 @@ namespace Njust.Pdf.Analysis.Tranforms
exist
.
AddOnlyProperties
(
o
=>
o
.
AnlysisUserName
);
exist
.
AddOnlyProperties
(
o
=>
o
.
AnlysisUserName
);
exist
.
IsAnlysis
=
true
;
exist
.
IsAnlysis
=
true
;
exist
.
AddOnlyProperties
(
o
=>
o
.
IsAnlysis
);
exist
.
AddOnlyProperties
(
o
=>
o
.
IsAnlysis
);
exist
.
Status
=
50
;
//解析了全文 后状态为50
exist
.
AddOnlyProperties
(
o
=>
o
.
Status
);
conn
.
UpdateOnly
(
exist
);
conn
.
UpdateOnly
(
exist
);
exist
.
RemoveAllOnlyProperties
();
exist
.
RemoveAllOnlyProperties
();
rtns
.
Results
.
Add
(
exist
);
rtns
.
Results
.
Add
(
exist
);
...
@@ -248,7 +268,7 @@ namespace Njust.Pdf.Analysis.Tranforms
...
@@ -248,7 +268,7 @@ namespace Njust.Pdf.Analysis.Tranforms
if
(!
item
.
Category
.
IsNullOrEmpty
())
item
.
AddOnlyProperties
(
o
=>
o
.
Category
);
if
(!
item
.
Category
.
IsNullOrEmpty
())
item
.
AddOnlyProperties
(
o
=>
o
.
Category
);
if
(!
item
.
Source
.
IsNullOrEmpty
())
item
.
AddOnlyProperties
(
o
=>
o
.
Source
);
if
(!
item
.
Source
.
IsNullOrEmpty
())
item
.
AddOnlyProperties
(
o
=>
o
.
Source
);
if
(
item
.
PublishTime
!=
null
)
item
.
AddOnlyProperties
(
o
=>
o
.
PublishTime
);
if
(
!
item
.
PublishTime
.
IsNullOrEmpty
()
)
item
.
AddOnlyProperties
(
o
=>
o
.
PublishTime
);
if
(!
item
.
Country
.
IsNullOrEmpty
())
item
.
AddOnlyProperties
(
o
=>
o
.
Country
);
if
(!
item
.
Country
.
IsNullOrEmpty
())
item
.
AddOnlyProperties
(
o
=>
o
.
Country
);
if
(!
item
.
Fund
.
IsNullOrEmpty
())
item
.
AddOnlyProperties
(
o
=>
o
.
Fund
);
if
(!
item
.
Fund
.
IsNullOrEmpty
())
item
.
AddOnlyProperties
(
o
=>
o
.
Fund
);
if
(!
item
.
Abstract
.
IsNullOrEmpty
())
item
.
AddOnlyProperties
(
o
=>
o
.
Abstract
);
if
(!
item
.
Abstract
.
IsNullOrEmpty
())
item
.
AddOnlyProperties
(
o
=>
o
.
Abstract
);
...
@@ -259,8 +279,6 @@ namespace Njust.Pdf.Analysis.Tranforms
...
@@ -259,8 +279,6 @@ namespace Njust.Pdf.Analysis.Tranforms
item
.
IsChecked
=
true
;
item
.
IsChecked
=
true
;
item
.
AddOnlyProperties
(
o
=>
o
.
IsChecked
);
item
.
AddOnlyProperties
(
o
=>
o
.
IsChecked
);
if
(!
item
.
Language
.
IsNullOrEmpty
())
item
.
AddOnlyProperties
(
o
=>
o
.
Language
);
if
(!
item
.
Language
.
IsNullOrEmpty
())
item
.
AddOnlyProperties
(
o
=>
o
.
Language
);
item
.
Status
=
100
;
//解析了全文 后状态为50
item
.
AddOnlyProperties
(
o
=>
o
.
Status
);
conn
.
UpdateOnly
(
item
);
conn
.
UpdateOnly
(
item
);
item
.
RemoveAllOnlyProperties
();
item
.
RemoveAllOnlyProperties
();
rtns
.
Results
.
Add
(
item
);
rtns
.
Results
.
Add
(
item
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment