Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
Njust.Pdf.Analysis.V4.5
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
陶然
Njust.Pdf.Analysis.V4.5
Commits
3687ab4d
Commit
3687ab4d
authored
Jun 23, 2021
by
陶然
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
升级优化
parent
9993a834
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
157 additions
and
101 deletions
+157
-101
Analyse.cs
Src/Entities/Analyse.cs
+1
-2
AnalyseJob.cs
Src/Jobs/AnalyseJob.cs
+97
-97
RestfulAnalyse.cs
Src/Tranforms/RestfulAnalyse.cs
+59
-2
No files found.
Src/Entities/Analyse.cs
View file @
3687ab4d
...
@@ -64,8 +64,7 @@ namespace Njust.Pdf.Analysis.Entities
...
@@ -64,8 +64,7 @@ namespace Njust.Pdf.Analysis.Entities
public
string
Mainbody
{
get
;
set
;
}
public
string
Mainbody
{
get
;
set
;
}
[
ApiMember
(
Description
=
"全文"
)]
[
ApiMember
(
Description
=
"全文"
)]
[
InternalSetter
]
[
Ignore
]
[
StringLength
(
int
.
MaxValue
)]
public
string
Allbody
{
get
;
set
;
}
public
string
Allbody
{
get
;
set
;
}
[
ApiMember
(
Description
=
"参考文献"
)]
[
ApiMember
(
Description
=
"参考文献"
)]
...
...
Src/Jobs/AnalyseJob.cs
View file @
3687ab4d
...
@@ -12,103 +12,103 @@ using System.Threading.Tasks;
...
@@ -12,103 +12,103 @@ using System.Threading.Tasks;
namespace
Njust.Pdf.Analysis.Jobs
namespace
Njust.Pdf.Analysis.Jobs
{
{
public
class
AnalyseJob
:
IJob
//
public class AnalyseJob : IJob
{
//
{
public
string
Name
//
public string Name
{
//
{
get
//
get
{
//
{
return
"使用PDFBox对PDF进行全文抓取"
;
//
return "使用PDFBox对PDF进行全文抓取";
}
//
}
}
//
}
public
string
Description
//
public string Description
{
//
{
get
//
get
{
//
{
return
"使用PDFBox对PDF进行全文抓取"
;
//
return "使用PDFBox对PDF进行全文抓取";
}
//
}
}
//
}
public
IJobContext
TaskContext
{
get
;
set
;
}
//
public IJobContext TaskContext { get; set; }
public
void
Dispose
()
//
public void Dispose()
{
//
{
}
//
}
public
bool
Execution
()
//
public bool Execution()
{
//
{
return
textPdfBox
();
//
return textPdfBox();
}
//
}
private
bool
textPdfBox
()
//
private bool textPdfBox()
{
//
{
int
succeed
=
0
,
total
=
0
;
//
int succeed = 0, total = 0;
var
conn
=
KiviiContext
.
GetOpenedDbConnection
<
AnalyseAll
>();
//
var conn = KiviiContext.GetOpenedDbConnection<AnalyseAll>();
try
//
try
{
//
{
var
query
=
conn
.
From
<
AnalyseAll
>();
//
var query = conn.From<AnalyseAll>();
query
.
Take
(
5
);
//
query.Take(5);
query
.
Skip
(
0
);
//
query.Skip(0);
query
.
Where
(
o
=>
o
.
IsAnlysis
==
false
);
//如果异常了,会重试
//
query.Where(o => o.IsAnlysis == false);//如果异常了,会重试
query
.
OrderBy
(
o
=>
o
.
CreateTime
).
ThenBy
(
o
=>
o
.
Kvid
);
//
query.OrderBy(o => o.CreateTime).ThenBy(o => o.Kvid);
var
entities
=
conn
.
Select
(
query
);
//
var entities = conn.Select(query);
if
(
entities
.
IsNullOrEmpty
())
//
if (entities.IsNullOrEmpty())
{
//
{
TaskContext
.
Message
=
"无数据需要处理"
;
//
TaskContext.Message = "无数据需要处理";
return
true
;
//
return true;
}
//
}
total
=
entities
.
Count
;
//
total = entities.Count;
var
queryPdfEntityFile
=
conn
.
From
<
EntityDbFile
<
Analyse
>>();
//
var queryPdfEntityFile = conn.From<EntityDbFile<Analyse>>();
queryPdfEntityFile
.
Where
(
o
=>
o
.
ParentKvid
==
Guid
.
Empty
&
o
.
DbFolderPath
==
"/Pdf/Analysis"
&
o
.
Extension
.
ToLower
()
==
".pdf"
&
Sql
.
In
(
o
.
OwnerKvid
,
entities
.
ConvertAll
(
p
=>
p
.
Kvid
)));
//
queryPdfEntityFile.Where(o => o.ParentKvid == Guid.Empty & o.DbFolderPath == "/Pdf/Analysis" & o.Extension.ToLower() == ".pdf" & Sql.In(o.OwnerKvid, entities.ConvertAll(p => p.Kvid)));
var
pdfEntityFiles
=
conn
.
Select
(
queryPdfEntityFile
);
//
var pdfEntityFiles = conn.Select(queryPdfEntityFile);
foreach
(
var
item
in
entities
)
//
foreach (var item in entities)
{
//
{
try
//
try
{
//
{
var
pdfFile
=
pdfEntityFiles
.
FirstOrDefault
(
o
=>
o
.
OwnerKvid
==
item
.
Kvid
);
//
var pdfFile = pdfEntityFiles.FirstOrDefault(o => o.OwnerKvid == item.Kvid);
if
(
pdfFile
==
null
)
throw
new
Exception
(
$"未找Pdf文件:
{
item
.
Title
}
,HashCode:
{
item
.
HashCode
}
"
);
//
if (pdfFile == null) throw new Exception($"未找Pdf文件:{item.Title},HashCode:{item.HashCode}");
var
physicalStorageFilePath
=
pdfFile
.
GetPhysicalPath
();
//
var physicalStorageFilePath = pdfFile.GetPhysicalPath();
if
(!
File
.
Exists
(
physicalStorageFilePath
))
throw
new
Exception
(
$"未找Pdf文件:
{
item
.
Title
}
,HashCode:
{
item
.
HashCode
}
"
);
//
if (!File.Exists(physicalStorageFilePath)) throw new Exception($"未找Pdf文件:{item.Title},HashCode:{item.HashCode}");
Console
.
WriteLine
(
"-------------------------Pre-------------------------"
);
//
Console.WriteLine("-------------------------Pre-------------------------");
Console
.
WriteLine
(
$"-------------------------
{
physicalStorageFilePath
}
-------------------------"
);
//
Console.WriteLine($"-------------------------{physicalStorageFilePath}-------------------------");
var
text
=
Kivii
.
PdfBox
.
Instance
.
ReadText
(
physicalStorageFilePath
);
//
var text = Kivii.PdfBox.Instance.ReadText(physicalStorageFilePath);
Console
.
WriteLine
(
"-------------------------Ok-------------------------"
);
//
Console.WriteLine("-------------------------Ok-------------------------");
item
.
Allbody
=
text
;
//
item.Allbody = text;
item
.
AddOnlyProperties
(
o
=>
o
.
Allbody
);
//
item.AddOnlyProperties(o => o.Allbody);
item
.
AnlysisTime
=
DateTime
.
Now
;
//
item.AnlysisTime = DateTime.Now;
item
.
AddOnlyProperties
(
o
=>
o
.
AnlysisTime
);
//
item.AddOnlyProperties(o => o.AnlysisTime);
item
.
AnlysisUserName
=
KiviiContext
.
CurrentMember
.
FullName
;
//
item.AnlysisUserName = KiviiContext.CurrentMember.FullName;
item
.
AddOnlyProperties
(
o
=>
o
.
AnlysisUserName
);
//
item.AddOnlyProperties(o => o.AnlysisUserName);
item
.
IsAnlysis
=
true
;
//
item.IsAnlysis = true;
item
.
AddOnlyProperties
(
o
=>
o
.
IsAnlysis
);
//
item.AddOnlyProperties(o => o.IsAnlysis);
Console
.
WriteLine
(
"-------------------------PreUpdateOnly-------------------------"
);
//
Console.WriteLine("-------------------------PreUpdateOnly-------------------------");
conn
.
UpdateOnly
(
item
);
//
conn.UpdateOnly(item);
Console
.
WriteLine
(
"-------------------------UpdateOnly-------------------------"
);
//
Console.WriteLine("-------------------------UpdateOnly-------------------------");
succeed
++;
//
succeed++;
}
//
}
catch
(
Exception
ex
)
//
catch (Exception ex)
{
//
{
Console
.
WriteLine
(
"-------------------------Ex-------------------------"
);
//
Console.WriteLine("-------------------------Ex-------------------------");
TaskContext
.
Message
=
$"处理消息异常:共处理
{
succeed
}
/
{
total
}
,Kvid:
{
item
.
Kvid
}
,Ex:
{
ex
.
Message
}
"
;
//
TaskContext.Message = $"处理消息异常:共处理{succeed}/{total},Kvid:{item.Kvid},Ex:{ex.Message}";
item
.
Status
+=
1
;
//
item.Status += 1;
item
.
AddOnlyProperties
(
o
=>
o
.
Status
);
//
item.AddOnlyProperties(o => o.Status);
conn
.
UpdateOnly
(
item
);
//
conn.UpdateOnly(item);
Console
.
WriteLine
(
"-------------------------ExUpdate-------------------------"
);
//
Console.WriteLine("-------------------------ExUpdate-------------------------");
return
false
;
//
return false;
}
//
}
}
//
}
}
//
}
catch
(
Exception
ex
)
//
catch (Exception ex)
{
//
{
TaskContext
.
Message
=
$"处理消息异常:共处理
{
succeed
}
/
{
total
}
,Ex:
{
ex
.
Message
}
"
;
//
TaskContext.Message = $"处理消息异常:共处理{succeed}/{total},Ex:{ex.Message}";
return
false
;
//
return false;
}
//
}
TaskContext
.
Message
=
$"完成:共处理
{
succeed
}
/
{
total
}
"
;
//
TaskContext.Message = $"完成:共处理{succeed}/{total}";
return
true
;
//
return true;
}
//
}
}
//
}
public
class
AnalyseImageJob
:
IJob
public
class
AnalyseImageJob
:
IJob
{
{
...
...
Src/Tranforms/RestfulAnalyse.cs
View file @
3687ab4d
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
using
Kivii.Linq
;
using
Kivii.Linq
;
using
Kivii.Pdf
;
using
Kivii.Pdf
;
using
Kivii.Web
;
using
Kivii.Web
;
using
Kivii.Zip
;
using
Njust.Pdf.Analysis.Entities
;
using
Njust.Pdf.Analysis.Entities
;
using
System
;
using
System
;
using
System.Collections.Generic
;
using
System.Collections.Generic
;
...
@@ -120,6 +121,7 @@ namespace Njust.Pdf.Analysis.Tranforms
...
@@ -120,6 +121,7 @@ namespace Njust.Pdf.Analysis.Tranforms
var
exist
=
exists
.
FirstOrDefault
(
o
=>
o
.
HashCode
==
item
.
HashCode
);
var
exist
=
exists
.
FirstOrDefault
(
o
=>
o
.
HashCode
==
item
.
HashCode
);
if
(
exist
!=
null
)
if
(
exist
!=
null
)
{
{
stream
.
Position
=
0
;
KiviiContext
.
VirtualFiles
.
WriteFile
(
exist
.
ImportPath
,
stream
);
KiviiContext
.
VirtualFiles
.
WriteFile
(
exist
.
ImportPath
,
stream
);
rtns
.
Results
.
Add
(
exist
);
rtns
.
Results
.
Add
(
exist
);
continue
;
continue
;
...
@@ -219,8 +221,8 @@ namespace Njust.Pdf.Analysis.Tranforms
...
@@ -219,8 +221,8 @@ namespace Njust.Pdf.Analysis.Tranforms
}
}
}
}
}
}
exist
.
Allbody
=
item
.
Allbody
.
Replace
(
"\n"
,
""
);
//
exist.Allbody = item.Allbody.Replace("\n", "");
exist
.
AddOnlyProperties
(
o
=>
o
.
Allbody
);
//
exist.AddOnlyProperties(o => o.Allbody);
exist
.
AnlysisTime
=
DateTime
.
Now
;
exist
.
AnlysisTime
=
DateTime
.
Now
;
exist
.
AddOnlyProperties
(
o
=>
o
.
AnlysisTime
);
exist
.
AddOnlyProperties
(
o
=>
o
.
AnlysisTime
);
exist
.
AnlysisUserName
=
KiviiContext
.
CurrentMember
.
FullName
;
exist
.
AnlysisUserName
=
KiviiContext
.
CurrentMember
.
FullName
;
...
@@ -425,6 +427,61 @@ namespace Njust.Pdf.Analysis.Tranforms
...
@@ -425,6 +427,61 @@ namespace Njust.Pdf.Analysis.Tranforms
}
}
}
}
public
class
AnalyseAllbody
:
RestfulExecution
<
Analyse
>
{
public
List
<
Guid
>
Kvids
{
get
;
set
;
}
public
bool
IsFiles
{
get
;
set
;
}
public
override
object
OnExecution
(
IRequest
req
,
IResponse
res
)
{
Kvids
.
ThrowIfNullOrEmpty
(
"Kvids is needed!"
);
var
conn
=
KiviiContext
.
GetOpenedDbConnection
<
Analyse
>();
var
analysis
=
conn
.
SelectByIds
<
Analyse
>(
Kvids
);
analysis
.
ThrowIfNullOrEmpty
(
"未找到目标!"
);
var
queryPdfEntityFile
=
conn
.
From
<
EntityDbFile
<
Analyse
>>();
queryPdfEntityFile
.
Where
(
o
=>
o
.
ParentKvid
==
Guid
.
Empty
&
o
.
DbFolderPath
==
"/Pdf/Analysis"
&
o
.
Extension
.
ToLower
()
==
".pdf"
&
Sql
.
In
(
o
.
OwnerKvid
,
Kvids
));
var
pdfEntityFiles
=
conn
.
Select
(
queryPdfEntityFile
);
var
rtns
=
new
RestfulQueryResponse
<
Analyse
>();
rtns
.
Results
=
new
List
<
Analyse
>();
foreach
(
var
item
in
analysis
)
{
var
pdfFile
=
pdfEntityFiles
.
FirstOrDefault
(
o
=>
o
.
OwnerKvid
==
item
.
Kvid
);
if
(
pdfFile
==
null
)
throw
new
Exception
(
$"未找Pdf文件:
{
item
.
Title
}
,HashCode:
{
item
.
HashCode
}
"
);
var
physicalStorageFilePath
=
pdfFile
.
GetPhysicalPath
();
if
(!
File
.
Exists
(
physicalStorageFilePath
))
throw
new
Exception
(
$"未找Pdf文件:
{
item
.
Title
}
,HashCode:
{
item
.
HashCode
}
"
);
var
text
=
Kivii
.
PdfBox
.
Instance
.
ReadText
(
physicalStorageFilePath
);
item
.
Allbody
=
text
;
rtns
.
Results
.
Add
(
item
);
}
if
(
IsFiles
)
{
var
memoryStream
=
new
MemoryStream
();
using
(
var
zipFile
=
new
ZipFile
(
System
.
Text
.
Encoding
.
UTF8
))
{
foreach
(
var
item
in
rtns
.
Results
)
{
byte
[]
buffer
=
Encoding
.
UTF8
.
GetBytes
(
item
.
Allbody
);
zipFile
.
AddEntry
(
item
.
FileName
.
ToLower
().
Replace
(
".pdf"
,
""
)
+
"_全文.txt"
,
buffer
);
}
zipFile
.
Save
(
memoryStream
);
}
memoryStream
.
Position
=
0
;
return
new
HttpResult
(
memoryStream
,
$"导出全文工
{
rtns
.
Results
.
Count
}
份文件.zip"
,
true
);
}
rtns
.
Total
=
rtns
.
Results
.
Count
();
return
rtns
;
}
}
#
region
CRUDQ
#
region
CRUDQ
[
RequiresAnyRole
(
SystemRoles
.
Everyone
)]
[
RequiresAnyRole
(
SystemRoles
.
Everyone
)]
public
class
AuthorCreate
:
RestfulCreate
<
Author
>
public
class
AuthorCreate
:
RestfulCreate
<
Author
>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment