Commit c98e4caf by 陶然

新增图片解析job

parent 423b2476
...@@ -42,7 +42,7 @@ namespace Njust.Pdf.Analysis.Entities ...@@ -42,7 +42,7 @@ namespace Njust.Pdf.Analysis.Entities
public string Keywords { get; set; } public string Keywords { get; set; }
[ApiMember(Description = "发表日期")] [ApiMember(Description = "发表日期")]
public DateTime? PublishTime { get; set; } public string PublishTime { get; set; }
[ApiMember(Description = "机构")] [ApiMember(Description = "机构")]
[StringLength(500)] [StringLength(500)]
...@@ -120,9 +120,6 @@ namespace Njust.Pdf.Analysis.Entities ...@@ -120,9 +120,6 @@ namespace Njust.Pdf.Analysis.Entities
public string DomainName { get; set; } public string DomainName { get; set; }
} }
/// <summary>
/// 导入系统:0 全文解析:25 确认后:50 复核后:100
/// </summary>
[Alias(Configs.TableAnalyseName)] [Alias(Configs.TableAnalyseName)]
[Api(Description ="文献解析表")] [Api(Description ="文献解析表")]
public class Analyse :AnalyseBase,IEntityInAssemblyDb public class Analyse :AnalyseBase,IEntityInAssemblyDb
...@@ -132,9 +129,6 @@ namespace Njust.Pdf.Analysis.Entities ...@@ -132,9 +129,6 @@ namespace Njust.Pdf.Analysis.Entities
} }
/// <summary>
/// 导入系统:0 全文解析:25 确认后:50 复核后:100
/// </summary>
[Alias(Configs.TableAnalyseName)] [Alias(Configs.TableAnalyseName)]
[Api(Description = "文献解析表")] [Api(Description = "文献解析表")]
public class AnalyseAll : AnalyseBase, IEntityInAssemblyDb public class AnalyseAll : AnalyseBase, IEntityInAssemblyDb
......
...@@ -50,9 +50,9 @@ namespace Njust.Pdf.Analysis.Jobs ...@@ -50,9 +50,9 @@ namespace Njust.Pdf.Analysis.Jobs
try try
{ {
var query = conn.From<AnalyseAll>(); var query = conn.From<AnalyseAll>();
query.Take(10); query.Take(5);
query.Skip(0); query.Skip(0);
query.Where(o => o.Status < 4);//如果异常了,会重试 query.Where(o => o.IsAnlysis == false);//如果异常了,会重试
query.OrderBy(o => o.CreateTime).ThenBy(o => o.Kvid); query.OrderBy(o => o.CreateTime).ThenBy(o => o.Kvid);
var entities = conn.Select(query); var entities = conn.Select(query);
if (entities.IsNullOrEmpty()) if (entities.IsNullOrEmpty())
...@@ -77,21 +77,99 @@ namespace Njust.Pdf.Analysis.Jobs ...@@ -77,21 +77,99 @@ namespace Njust.Pdf.Analysis.Jobs
text = regex.Replace(text, ""); text = regex.Replace(text, "");
item.Allbody = text; item.Allbody = text;
item.AddOnlyProperties(o => o.Allbody); item.AddOnlyProperties(o => o.Allbody);
//item.AnlysisTime = DateTime.Now; item.AnlysisTime = DateTime.Now;
//item.AddOnlyProperties(o => o.AnlysisTime); item.AddOnlyProperties(o => o.AnlysisTime);
//item.AnlysisUserName = KiviiContext.CurrentMember.FullName; item.AnlysisUserName = KiviiContext.CurrentMember.FullName;
//item.AddOnlyProperties(o => o.AnlysisUserName); item.AddOnlyProperties(o => o.AnlysisUserName);
//item.IsAnlysis = true; item.IsAnlysis = true;
//item.AddOnlyProperties(o => o.IsAnlysis); item.AddOnlyProperties(o => o.IsAnlysis);
item.Status = 25;//解析了全文 后状态为50
item.AddOnlyProperties(o => o.Status);
conn.UpdateOnly(item); conn.UpdateOnly(item);
succeed++; succeed++;
}
catch (Exception ex)
{
TaskContext.Message = $"处理消息异常:共处理{succeed}/{total},Kvid:{item.Kvid},Ex:{ex.Message}";
item.Status += 1;
item.AddOnlyProperties(o => o.Status);
conn.UpdateOnly(item);
return false;
}
}
}
catch (Exception ex)
{
TaskContext.Message = $"处理消息异常:共处理{succeed}/{total},Ex:{ex.Message}";
return false;
}
TaskContext.Message = $"完成:共处理{succeed}/{total}";
return true;
}
}
public class AnalyseImageJob : IJob
{
public string Name
{
get
{
return "抓取PDF中的图片";
}
}
public string Description
{
get
{
return "抓取PDF中的图片";
}
}
public IJobContext TaskContext { get; set; }
public void Dispose()
{
}
public bool Execution()
{
return pdfImage();
}
private bool pdfImage()
{
int succeed = 0, total = 0;
var conn = KiviiContext.GetOpenedDbConnection<AnalyseAll>();
try
{
var query = conn.From<AnalyseAll>();
query.Take(2);
query.Skip(0);
query.Where(o => o.Status < 4);//如果异常了,会重试
query.OrderBy(o => o.CreateTime).ThenBy(o => o.Kvid);
var entities = conn.Select(query);
if (entities.IsNullOrEmpty())
{
TaskContext.Message = "无数据需要处理";
return true;
}
total = entities.Count;
var queryPdfEntityFile = conn.From<EntityDbFile<Analyse>>();
queryPdfEntityFile.Where(o => o.ParentKvid == Guid.Empty & o.DbFolderPath == "/Pdf/Analysis" & o.Extension.ToLower() == ".pdf" & Sql.In(o.OwnerKvid, entities.ConvertAll(p => p.Kvid)));
var pdfEntityFiles = conn.Select(queryPdfEntityFile);
foreach (var item in entities)
{
try
{
var pdfFile = pdfEntityFiles.FirstOrDefault(o => o.OwnerKvid == item.Kvid);
if (pdfFile == null) throw new Exception($"未找Pdf文件:{item.Title},HashCode:{item.HashCode}");
var physicalStorageFilePath = pdfFile.GetPhysicalPath();
if (!File.Exists(physicalStorageFilePath)) throw new Exception($"未找Pdf文件:{item.Title},HashCode:{item.HashCode}");
#region 图片 #region 图片
var imgPath = $"/Storages/Pdf/Analysis/Images/{item.Kvid}"; var imgPath = $"/Storages/Pdf/Analysis/Images/{item.Kvid}";
var reader = new PdfReader(physicalStorageFilePath); var reader = new PdfReader(physicalStorageFilePath);
var fileName = pdfFile.Name.Replace(pdfFile.Extension, "");
for (var i = 0; i < reader.XrefSize; i++) for (var i = 0; i < reader.XrefSize; i++)
{ {
var pdfObj = reader.GetPdfObject(i); var pdfObj = reader.GetPdfObject(i);
...@@ -105,7 +183,7 @@ namespace Njust.Pdf.Analysis.Jobs ...@@ -105,7 +183,7 @@ namespace Njust.Pdf.Analysis.Jobs
{ {
var ms = new MemoryStream(bytes); var ms = new MemoryStream(bytes);
ms.Position = 0; ms.Position = 0;
KiviiContext.VirtualFiles.WriteFile($"{imgPath}/{i}.jpg", ms); KiviiContext.VirtualFiles.WriteFile($"{imgPath}/{fileName}_{i}.jpg", ms);
//var img = System.Drawing.Image.FromStream(ms); //var img = System.Drawing.Image.FromStream(ms);
//img.Save(Path.Combine(tbxExportImagesPath.Text, $"{i}.jpg"), System.Drawing.Imaging.ImageFormat.Jpeg); //img.Save(Path.Combine(tbxExportImagesPath.Text, $"{i}.jpg"), System.Drawing.Imaging.ImageFormat.Jpeg);
...@@ -114,10 +192,15 @@ namespace Njust.Pdf.Analysis.Jobs ...@@ -114,10 +192,15 @@ namespace Njust.Pdf.Analysis.Jobs
{ } { }
} }
#endregion #endregion
item.Status = int.MaxValue;
item.AddOnlyProperties(o => o.Status);
conn.UpdateOnly(item);
succeed++;
} }
catch (Exception ex) catch (Exception ex)
{ {
TaskContext.Message = $"处理消息异常:共处理{succeed}/{total},Ex:{ex.Message}"; TaskContext.Message = $"处理消息异常:共处理{succeed}/{total},Kvid:{item.Kvid},Ex:{ex.Message}";
item.Status += 1; item.Status += 1;
item.AddOnlyProperties(o => o.Status); item.AddOnlyProperties(o => o.Status);
conn.UpdateOnly(item); conn.UpdateOnly(item);
......
...@@ -44,6 +44,28 @@ namespace Njust.Pdf.Analysis.Tranforms ...@@ -44,6 +44,28 @@ namespace Njust.Pdf.Analysis.Tranforms
#endregion #endregion
[RequiresAnyRole(SystemRoles.Everyone)] [RequiresAnyRole(SystemRoles.Everyone)]
[Route("/Analysis/PreCheck/{Kvid}")]
public class AnalysePreCheck : RestfulExecution<Analyse>
{
public Guid Kvid { get; set; }
public override object OnExecution(IRequest req, IResponse res)
{
if (Kvid==Guid.Empty) return HttpError.NotFound("Need Kvid!");
var conn = KiviiContext.GetOpenedDbConnection<Analyse>();
var analyse = conn.SingleById<Analyse>(Kvid);
if(analyse==null)return HttpError.NotFound("This Item is Unannounced!");
return new HttpResult
{
Response = analyse,
View = "Template.AnalysisPDF"
};
}
}
[RequiresAnyRole(SystemRoles.Everyone)]
public class AnalyseImport : RestfulExecution<Analyse> public class AnalyseImport : RestfulExecution<Analyse>
{ {
public string DomainName { get; set; } public string DomainName { get; set; }
...@@ -141,8 +163,6 @@ namespace Njust.Pdf.Analysis.Tranforms ...@@ -141,8 +163,6 @@ namespace Njust.Pdf.Analysis.Tranforms
exist.AddOnlyProperties(o => o.AnlysisUserName); exist.AddOnlyProperties(o => o.AnlysisUserName);
exist.IsAnlysis = true; exist.IsAnlysis = true;
exist.AddOnlyProperties(o => o.IsAnlysis); exist.AddOnlyProperties(o => o.IsAnlysis);
exist.Status = 50;//解析了全文 后状态为50
exist.AddOnlyProperties(o => o.Status);
conn.UpdateOnly(exist); conn.UpdateOnly(exist);
exist.RemoveAllOnlyProperties(); exist.RemoveAllOnlyProperties();
rtns.Results.Add(exist); rtns.Results.Add(exist);
...@@ -248,7 +268,7 @@ namespace Njust.Pdf.Analysis.Tranforms ...@@ -248,7 +268,7 @@ namespace Njust.Pdf.Analysis.Tranforms
if (!item.Category.IsNullOrEmpty()) item.AddOnlyProperties(o => o.Category); if (!item.Category.IsNullOrEmpty()) item.AddOnlyProperties(o => o.Category);
if (!item.Source.IsNullOrEmpty()) item.AddOnlyProperties(o => o.Source); if (!item.Source.IsNullOrEmpty()) item.AddOnlyProperties(o => o.Source);
if (item.PublishTime != null) item.AddOnlyProperties(o => o.PublishTime); if (!item.PublishTime.IsNullOrEmpty()) item.AddOnlyProperties(o => o.PublishTime);
if (!item.Country.IsNullOrEmpty()) item.AddOnlyProperties(o => o.Country); if (!item.Country.IsNullOrEmpty()) item.AddOnlyProperties(o => o.Country);
if (!item.Fund.IsNullOrEmpty()) item.AddOnlyProperties(o => o.Fund); if (!item.Fund.IsNullOrEmpty()) item.AddOnlyProperties(o => o.Fund);
if (!item.Abstract.IsNullOrEmpty()) item.AddOnlyProperties(o => o.Abstract); if (!item.Abstract.IsNullOrEmpty()) item.AddOnlyProperties(o => o.Abstract);
...@@ -259,8 +279,6 @@ namespace Njust.Pdf.Analysis.Tranforms ...@@ -259,8 +279,6 @@ namespace Njust.Pdf.Analysis.Tranforms
item.IsChecked = true; item.IsChecked = true;
item.AddOnlyProperties(o => o.IsChecked); item.AddOnlyProperties(o => o.IsChecked);
if (!item.Language.IsNullOrEmpty()) item.AddOnlyProperties(o => o.Language); if (!item.Language.IsNullOrEmpty()) item.AddOnlyProperties(o => o.Language);
item.Status = 100;//解析了全文 后状态为50
item.AddOnlyProperties(o => o.Status);
conn.UpdateOnly(item); conn.UpdateOnly(item);
item.RemoveAllOnlyProperties(); item.RemoveAllOnlyProperties();
rtns.Results.Add(item); rtns.Results.Add(item);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment